~hikiko/nux/arb-srgba-shader

« back to all changes in this revision

Viewing changes to NuxCore/TinyXML/tinyxmlparser.cpp

Committer: Neil Jagdish Patel
Date: 2010-09-01 19:25:37 UTC
Revision ID: neil.patel@canonical.com-20100901192537-mfz7rm6q262pewg6

Import and build NuxCore

files added:
NuxCore

NuxCore/BitmapFormats.cpp

NuxCore/BitmapFormats.h

NuxCore/CMakeLists.txt

NuxCore/Character

NuxCore/Character/NAscii.cpp

NuxCore/Character/NAscii.h

NuxCore/Character/NTChar.h

NuxCore/Character/NUTF.cpp

NuxCore/Character/NUTF.h

NuxCore/Character/NUni.cpp

NuxCore/Character/NUni.h

NuxCore/Character/NUnicode.cpp

NuxCore/Character/NUnicode.h

NuxCore/Character/NUnicodeGNU.cpp

NuxCore/Character/NUnicodeGNU.h

NuxCore/Character/NUnicodePS3.cpp

NuxCore/Character/NUnicodePS3.h

NuxCore/Color.cpp

NuxCore/Color.h

NuxCore/ColorFunctions.cpp

NuxCore/ColorFunctions.h

NuxCore/CppReadme.txt

NuxCore/Crypto

NuxCore/Crypto/NHashFunctions.cpp

NuxCore/Crypto/NHashFunctions.h

NuxCore/Crypto/NMD5.cpp

NuxCore/Crypto/NMD5.h

NuxCore/Crypto/NSHA1.cpp

NuxCore/Crypto/NSHA1.h

NuxCore/Crypto/NSHA2.cpp

NuxCore/Crypto/NSHA2.h

NuxCore/Crypto/NUUID.cpp

NuxCore/Crypto/NUUID.h

NuxCore/DataStruct

NuxCore/DataStruct/NList.h

NuxCore/Error.cpp

NuxCore/Error.h

NuxCore/Exception.cpp

NuxCore/Exception.h

NuxCore/FileManager

NuxCore/FileManager/NFileManagerGNU.cpp

NuxCore/FileManager/NFileManagerGNU.h

NuxCore/FileManager/NFileManagerGeneric.cpp

NuxCore/FileManager/NFileManagerGeneric.h

NuxCore/FileManager/NFileManagerPS3.cpp

NuxCore/FileManager/NFileManagerPS3.h

NuxCore/FileManager/NFileManagerStandardAnsi.cpp

NuxCore/FileManager/NFileManagerStandardAnsi.h

NuxCore/FileManager/NFileManagerWindows.cpp

NuxCore/FileManager/NFileManagerWindows.h

NuxCore/FileManager/NSerializer.cpp

NuxCore/FileManager/NSerializer.h

NuxCore/FilePath.cpp

NuxCore/FilePath.h

NuxCore/Inalogic.h

NuxCore/Makefile.am

NuxCore/Math

NuxCore/Math/Bezier.cpp

NuxCore/Math/Bezier.h

NuxCore/Math/Complex.cpp

NuxCore/Math/Complex.h

NuxCore/Math/Constants.h

NuxCore/Math/Ease

NuxCore/Math/Ease/Back.as

NuxCore/Math/Ease/Bounce.as

NuxCore/Math/Ease/Circ.as

NuxCore/Math/Ease/Cubic.as

NuxCore/Math/Ease/Elastic.as

NuxCore/Math/Ease/Expo.as

NuxCore/Math/Ease/Linear.as

NuxCore/Math/Ease/Quad.as

NuxCore/Math/Ease/Quart.as

NuxCore/Math/Ease/Quint.as

NuxCore/Math/Ease/Sine.as

NuxCore/Math/Ease/easing_readme.txt

NuxCore/Math/Line2D.cpp

NuxCore/Math/Line2D.h

NuxCore/Math/Line3D.cpp

NuxCore/Math/Line3D.h

NuxCore/Math/MathFunctions.cpp

NuxCore/Math/MathFunctions.h

NuxCore/Math/MathInc.h

NuxCore/Math/MathUtility.h

NuxCore/Math/Matrix2.cpp

NuxCore/Math/Matrix2.h

NuxCore/Math/Matrix3.cpp

NuxCore/Math/Matrix3.h

NuxCore/Math/Matrix4.cpp

NuxCore/Math/Matrix4.h

NuxCore/Math/Point2D.cpp

NuxCore/Math/Point2D.h

NuxCore/Math/Point3D.cpp

NuxCore/Math/Point3D.h

NuxCore/Math/Quaternion.cpp

NuxCore/Math/Quaternion.h

NuxCore/Math/Spline.cpp

NuxCore/Math/Spline.h

NuxCore/Math/Trigonometry.cpp

NuxCore/Math/Trigonometry.h

NuxCore/Math/Tweening.cpp

NuxCore/Math/Tweening.h

NuxCore/Math/Vector2.cpp

NuxCore/Math/Vector2.h

NuxCore/Math/Vector3.cpp

NuxCore/Math/Vector3.h

NuxCore/Math/Vector4.cpp

NuxCore/Math/Vector4.h

NuxCore/Memory

NuxCore/Memory/NDefaultMemoryAllocator.cpp

NuxCore/Memory/NDefaultMemoryAllocator.h

NuxCore/Memory/NMemoryAllocator.cpp

NuxCore/Memory/NMemoryAllocator.h

NuxCore/Memory/NMemoryAllocatorInterface.cpp

NuxCore/Memory/NMemoryAllocatorInterface.h

NuxCore/Memory/NMemoryDebugHook.cpp

NuxCore/Memory/NMemoryDebugHook.h

NuxCore/Memory/NMemoryHook.cpp

NuxCore/Memory/NMemoryHook.h

NuxCore/NArray.cpp

NuxCore/NArray.h

NuxCore/NCPU.cpp

NuxCore/NCPU.h

NuxCore/NCRC32.cpp

NuxCore/NCRC32.h

NuxCore/NFile.cpp

NuxCore/NFile.h

NuxCore/NFileName.cpp

NuxCore/NFileName.h

NuxCore/NGlobalInitializer.cpp

NuxCore/NGlobalInitializer.h

NuxCore/NKernel.cpp

NuxCore/NKernel.h

NuxCore/NMacros.h

NuxCore/NMemory.cpp

NuxCore/NMemory.h

NuxCore/NNamespace.h

NuxCore/NObjectType.cpp

NuxCore/NObjectType.h

NuxCore/NOutputDevice.cpp

NuxCore/NOutputDevice.h

NuxCore/NParsing.cpp

NuxCore/NParsing.h

NuxCore/NPlatform.cpp

NuxCore/NPlatform.h

NuxCore/NPrintf.cpp

NuxCore/NPrintf.h

NuxCore/NProcess.cpp

NuxCore/NProcess.h

NuxCore/NStreamBuffer.cpp

NuxCore/NStreamBuffer.h

NuxCore/NString.cpp

NuxCore/NString.h

NuxCore/NStringConversion.h

NuxCore/NSystem.h

NuxCore/NSystemGNU.cpp

NuxCore/NSystemGNU.h

NuxCore/NSystemPS3.cpp

NuxCore/NSystemPS3.h

NuxCore/NSystemTypes.h

NuxCore/NSystemWindows.cpp

NuxCore/NSystemWindows.h

NuxCore/NTemplate.cpp

NuxCore/NTemplate.h

NuxCore/NThread.cpp

NuxCore/NThread.h

NuxCore/NThreadGNU.cpp

NuxCore/NThreadGNU.h

NuxCore/NThreadPS3.cpp

NuxCore/NThreadPS3.h

NuxCore/NTime.cpp

NuxCore/NTime.h

NuxCore/NUniqueIndex.cpp

NuxCore/NUniqueIndex.h

NuxCore/Plugin

NuxCore/Plugin/NPlugin.h

NuxCore/Plugin/NPluginInterface.h

NuxCore/Plugin/NPluginManager.cpp

NuxCore/Plugin/NPluginManager.h

NuxCore/Plugin/NPluging.cpp

NuxCore/Point.cpp

NuxCore/Point.h

NuxCore/Rect.cpp

NuxCore/Rect.h

NuxCore/Size.cpp

NuxCore/Size.h

NuxCore/SmartPtr

NuxCore/SmartPtr/NRefCount.cpp

NuxCore/SmartPtr/NRefCount.h

NuxCore/SmartPtr/NSmartPtr.cpp

NuxCore/SmartPtr/NSmartPtr.h

NuxCore/TinyXML

NuxCore/TinyXML/tinystr.cpp

NuxCore/TinyXML/tinystr.h

NuxCore/TinyXML/tinyxml.cpp

NuxCore/TinyXML/tinyxml.h

NuxCore/TinyXML/tinyxmlerror.cpp

NuxCore/TinyXML/tinyxmlparser.cpp

NuxCore/Win32Dialogs

NuxCore/Win32Dialogs/NWin32Clipboard.cpp

NuxCore/Win32Dialogs/NWin32Clipboard.h

NuxCore/Win32Dialogs/NWin32CustomDialog.cpp

NuxCore/Win32Dialogs/NWin32CustomDialog.h

NuxCore/Win32Dialogs/NWin32MessageBox.cpp

NuxCore/Win32Dialogs/NWin32MessageBox.h

NuxCore/Zip

NuxCore/Zip/crypt.h

NuxCore/Zip/ioapi.c

NuxCore/Zip/ioapi.h

NuxCore/Zip/iowin32.c

NuxCore/Zip/iowin32.h

NuxCore/Zip/unzip.c

NuxCore/Zip/unzip.h

NuxCore/Zip/zip.c

NuxCore/Zip/zip.h

NuxCore/Zip/zipstream

NuxCore/Zip/zipstream/zip_stream_test.cpp

NuxCore/Zip/zipstream/zip_stream_test.h

NuxCore/Zip/zipstream/zipstream.h

NuxCore/Zip/zipstream/zipstream.ipp

files modified:
.bzrignore

INSTALL

Makefile.am

configure.ac

Show diffs side-by-side

added added

removed removed

NuxCore/TinyXML/tinyxmlparser.cpp

www.sourceforge.net/projects/tinyxml

This software is provided 'as-is', without any express or implied

warranty. In no event will the authors be held liable for any

damages arising from the use of this software.

Permission is granted to anyone to use this software for any

purpose, including commercial applications, and to alter it and

redistribute it freely, subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must

not claim that you wrote the original software. If you use this

software in a product, an acknowledgment in the product documentation

would be appreciated but is not required.

2. Altered source versions must be plainly marked as such, and

must not be misrepresented as being the original software.

3. This notice may not be removed or altered from any source

distribution.

#include <ctype.h>

#include <stddef.h>

#include "tinyxml.h"

//#define DEBUG_PARSER

#if defined( DEBUG_PARSER )

# if defined( DEBUG ) && defined( _MSC_VER )

# include <windows.h>

# define TIXML_LOG OutputDebugString

# else

# define TIXML_LOG printf

# endif

#endif

// Note tha "PutString" hardcodes the same list. This

// is less flexible than it appears. Changing the entries

// or order will break putstring.

TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =

{

{ "&", 5, '&' },

{ "<", 4, '<' },

{ ">", 4, '>' },

{ """, 6, '\"' },

{ "'", 6, '\'' }

};

// Bunch of unicode info at:

// http://www.unicode.org/faq/utf_bom.html

// Including the basic of this table, which determines the #bytes in the

// sequence from the lead byte. 1 placed for invalid sequences --

// although the result will be junk, pass it through as much as possible.

// Beware of the non-characters in UTF-8:

// ef bb bf (Microsoft "lead bytes")

// ef bf be

// ef bf bf

const unsigned char TIXML_UTF_LEAD_0 = 0xefU;

const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;

const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;

const int TiXmlBase::utf8ByteTable[256] =

{

// 0 1 2 3 4 5 6 7 8 9 a b c d e f

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0

1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0

3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte

4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid

};

void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )

{

const unsigned long BYTE_MASK = 0xBF;

const unsigned long BYTE_MARK = 0x80;

const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

if (input < 0x80)

*length = 1;

else if ( input < 0x800 )

*length = 2;

else if ( input < 0x10000 )

*length = 3;

100

else if ( input < 0x200000 )

101

*length = 4;

102

else

103

{ *length = 0; return; } // This code won't covert this correctly anyway.

104

105

output += *length;

106

107

// Scary scary fall throughs.

108

switch (*length)

109

{

110

case 4:

111

--output;

112

*output = (char)((input | BYTE_MARK) & BYTE_MASK);

113

input >>= 6;

114

case 3:

115

--output;

116

*output = (char)((input | BYTE_MARK) & BYTE_MASK);

117

input >>= 6;

118

case 2:

119

--output;

120

*output = (char)((input | BYTE_MARK) & BYTE_MASK);

121

input >>= 6;

122

case 1:

123

--output;

124

*output = (char)(input | FIRST_BYTE_MARK[*length]);

125

}

126

}

127

128

129

/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )

130

{

131

// This will only work for low-ascii, everything else is assumed to be a valid

132

// letter. I'm not sure this is the best approach, but it is quite tricky trying

133

// to figure out alhabetical vs. not across encoding. So take a very

134

// conservative approach.

135

136

// if ( encoding == TIXML_ENCODING_UTF8 )

137

// {

138

if ( anyByte < 127 )

139

return isalpha( anyByte );

140

else

141

return 1; // What else to do? The unicode set is huge...get the english ones right.

142

// }

143

// else

144

// {

145

// return isalpha( anyByte );

146

// }

147

}

148

149

150

/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )

151

{

152

// This will only work for low-ascii, everything else is assumed to be a valid

153

// letter. I'm not sure this is the best approach, but it is quite tricky trying

154

// to figure out alhabetical vs. not across encoding. So take a very

155

// conservative approach.

156

157

// if ( encoding == TIXML_ENCODING_UTF8 )

158

// {

159

if ( anyByte < 127 )

160

return isalnum( anyByte );

161

else

162

return 1; // What else to do? The unicode set is huge...get the english ones right.

163

// }

164

// else

165

// {

166

// return isalnum( anyByte );

167

// }

168

}

169

170

171

class TiXmlParsingData

172

{

173

friend class TiXmlDocument;

174

public:

175

void Stamp( const char* now, TiXmlEncoding encoding );

176

177

const TiXmlCursor& Cursor() { return cursor; }

178

179

private:

180

// Only used by the document!

181

TiXmlParsingData( const char* start, int _tabsize, int row, int col )

182

{

183

assert( start );

184

stamp = start;

185

tabsize = _tabsize;

186

cursor.row = row;

187

cursor.col = col;

188

}

189

190

TiXmlCursor cursor;

191

const char* stamp;

192

int tabsize;

193

};

194

195

196

void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )

197

{

198

assert( now );

199

200

// Do nothing if the tabsize is 0.

201

if ( tabsize < 1 )

202

{

203

return;

204

}

205

206

// Get the current row, column.

207

int row = cursor.row;

208

int col = cursor.col;

209

const char* p = stamp;

210

assert( p );

211

212

while ( p < now )

213

{

214

// Treat p as unsigned, so we have a happy compiler.

215

const unsigned char* pU = (const unsigned char*)p;

216

217

// Code contributed by Fletcher Dunn: (modified by lee)

218

switch (*pU) {

219

case 0:

220

// We *should* never get here, but in case we do, don't

221

// advance past the terminating null character, ever

222

return;

223

224

case '\r':

225

// bump down to the next line

226

++row;

227

col = 0;

228

// Eat the character

229

++p;

230

231

// Check for \r\n sequence, and treat this as a single character

232

if (*p == '\n') {

233

++p;

234

}

235

break;

236

237

case '\n':

238

// bump down to the next line

239

++row;

240

col = 0;

241

242

// Eat the character

243

++p;

244

245

// Check for \n\r sequence, and treat this as a single

246

// character. (Yes, this bizarre thing does occur still

247

// on some arcane platforms...)

248

if (*p == '\r') {

249

++p;

250

}

251

break;

252

253

case '\t':

254

// Eat the character

255

++p;

256

257

// Skip to next tab stop

258

col = (col / tabsize + 1) * tabsize;

259

break;

260

261

case TIXML_UTF_LEAD_0:

262

if ( encoding == TIXML_ENCODING_UTF8 )

263

{

264

if ( *(p+1) && *(p+2) )

265

{

266

// In these cases, don't advance the column. These are

267

// 0-width spaces.

268

if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )

269

p += 3;

270

else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )

271

p += 3;

272

else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )

273

p += 3;

274

else

275

{ p +=3; ++col; } // A normal character.

276

}

277

}

278

else

279

{

280

++p;

281

++col;

282

}

283

break;

284

285

default:

286

if ( encoding == TIXML_ENCODING_UTF8 )

287

{

288

// Eat the 1 to 4 byte utf8 character.

289

int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];

290

if ( step == 0 )

291

step = 1; // Error case from bad encoding, but handle gracefully.

292

p += step;

293

294

// Just advance one column, of course.

295

++col;

296

}

297

else

298

{

299

++p;

300

++col;

301

}

302

break;

303

}

304

}

305

cursor.row = row;

306

cursor.col = col;

307

assert( cursor.row >= -1 );

308

assert( cursor.col >= -1 );

309

stamp = p;

310

assert( stamp );

311

}

312

313

314

const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )

315

{

316

if ( !p || !*p )

317

{

318

return 0;

319

}

320

if ( encoding == TIXML_ENCODING_UTF8 )

321

{

322

while ( *p )

323

{

324

const unsigned char* pU = (const unsigned char*)p;

325

326

// Skip the stupid Microsoft UTF-8 Byte order marks

327

if ( *(pU+0)==TIXML_UTF_LEAD_0

328

&& *(pU+1)==TIXML_UTF_LEAD_1

329

&& *(pU+2)==TIXML_UTF_LEAD_2 )

330

{

331

p += 3;

332

continue;

333

}

334

else if(*(pU+0)==TIXML_UTF_LEAD_0

335

&& *(pU+1)==0xbfU

336

&& *(pU+2)==0xbeU )

337

{

338

p += 3;

339

continue;

340

}

341

else if(*(pU+0)==TIXML_UTF_LEAD_0

342

&& *(pU+1)==0xbfU

343

&& *(pU+2)==0xbfU )

344

{

345

p += 3;

346

continue;

347

}

348

349

if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space.

350

++p;

351

else

352

break;

353

}

354

}

355

else

356

{

357

while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )

358

++p;

359

}

360

361

return p;

362

}

363

364

#ifdef TIXML_USE_STL

365

/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )

366

{

367

for( ;; )

368

{

369

if ( !in->good() ) return false;

370

371

int c = in->peek();

372

// At this scope, we can't get to a document. So fail silently.

373

if ( !IsWhiteSpace( c ) || c <= 0 )

374

return true;

375

376

*tag += (char) in->get();

377

}

378

}

379

380

/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )

381

{

382

//assert( character > 0 && character < 128 ); // else it won't work in utf-8

383

while ( in->good() )

384

{

385

int c = in->peek();

386

if ( c == character )

387

return true;

388

if ( c <= 0 ) // Silent failure: can't get document at this scope

389

return false;

390

391

in->get();

392

*tag += (char) c;

393

}

394

return false;

395

}

396

#endif

397

398

// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The

399

// "assign" optimization removes over 10% of the execution time.

400

401

const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )

402

{

403

// Oddly, not supported on some comilers,

404

//name->clear();

405

// So use this:

406

*name = "";

407

assert( p );

408

409

// Names start with letters or underscores.

410

// Of course, in unicode, tinyxml has no idea what a letter *is*. The

411

// algorithm is generous.

412

413

// After that, they can be letters, underscores, numbers,

414

// hyphens, or colons. (Colons are valid ony for namespaces,

415

// but tinyxml can't tell namespaces from names.)

416

if ( p && *p

417

&& ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )

418

{

419

const char* start = p;

420

while( p && *p

421

&& ( IsAlphaNum( (unsigned char ) *p, encoding )

422

|| *p == '_'

423

|| *p == '-'

424

|| *p == '.'

425

|| *p == ':' ) )

426

{

427

//(*name) += *p; // expensive

428

++p;

429

}

430

if ( p-start > 0 ) {

431

name->assign( start, p-start );

432

}

433

return p;

434

}

435

return 0;

436

}

437

438

const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )

439

{

440

// Presume an entity, and pull it out.

441

TIXML_STRING ent;

442

int i;

443

*length = 0;

444

445

if ( *(p+1) && *(p+1) == '#' && *(p+2) )

446

{

447

unsigned long ucs = 0;

448

ptrdiff_t delta = 0;

449

unsigned mult = 1;

450

451

if ( *(p+2) == 'x' )

452

{

453

// Hexadecimal.

454

if ( !*(p+3) ) return 0;

455

456

const char* q = p+3;

457

q = strchr( q, ';' );

458

459

if ( !q || !*q ) return 0;

460

461

delta = q-p;

462

--q;

463

464

while ( *q != 'x' )

465

{

466

if ( *q >= '0' && *q <= '9' )

467

ucs += mult * (*q - '0');

468

else if ( *q >= 'a' && *q <= 'f' )

469

ucs += mult * (*q - 'a' + 10);

470

else if ( *q >= 'A' && *q <= 'F' )

471

ucs += mult * (*q - 'A' + 10 );

472

else

473

return 0;

474

mult *= 16;

475

--q;

476

}

477

}

478

else

479

{

480

// Decimal.

481

if ( !*(p+2) ) return 0;

482

483

const char* q = p+2;

484

q = strchr( q, ';' );

485

486

if ( !q || !*q ) return 0;

487

488

delta = q-p;

489

--q;

490

491

while ( *q != '#' )

492

{

493

if ( *q >= '0' && *q <= '9' )

494

ucs += mult * (*q - '0');

495

else

496

return 0;

497

mult *= 10;

498

--q;

499

}

500

}

501

if ( encoding == TIXML_ENCODING_UTF8 )

502

{

503

// convert the UCS to UTF-8

504

ConvertUTF32ToUTF8( ucs, value, length );

505

}

506

else

507

{

508

*value = (char)ucs;

509

*length = 1;

510

}

511

return p + delta + 1;

512

}

513

514

// Now try to match it.

515

for( i=0; i<NUM_ENTITY; ++i )

516

{

517

if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )

518

{

519

assert( strlen( entity[i].str ) == entity[i].strLength );

520

*value = entity[i].chr;

521

*length = 1;

522

return ( p + entity[i].strLength );

523

}

524

}

525

526

// So it wasn't an entity, its unrecognized, or something like that.

527

*value = *p; // Don't put back the last one, since we return it!

528

//*length = 1; // Leave unrecognized entities - this doesn't really work.

529

// Just writes strange XML.

530

return p+1;

531

}

532

533

534

bool TiXmlBase::StringEqual( const char* p,

535

const char* tag,

536

bool ignoreCase,

537

TiXmlEncoding encoding )

538

{

539

assert( p );

540

assert( tag );

541

if ( !p || !*p )

542

{

543

assert( 0 );

544

return false;

545

}

546

547

const char* q = p;

548

549

if ( ignoreCase )

550

{

551

while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )

552

{

553

++q;

554

++tag;

555

}

556

557

if ( *tag == 0 )

558

return true;

559

}

560

else

561

{

562

while ( *q && *tag && *q == *tag )

563

{

564

++q;

565

++tag;

566

}

567

568

if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?

569

return true;

570

}

571

return false;

572

}

573

574

const char* TiXmlBase::ReadText( const char* p,

575

TIXML_STRING * text,

576

bool trimWhiteSpace,

577

const char* endTag,

578

bool caseInsensitive,

579

TiXmlEncoding encoding )

580

{

581

*text = "";

582

if ( !trimWhiteSpace // certain tags always keep whitespace

583

|| !condenseWhiteSpace ) // if true, whitespace is always kept

584

{

585

// Keep all the white space.

586

while ( p && *p

587

&& !StringEqual( p, endTag, caseInsensitive, encoding )

588

)

589

{

590

int len;

591

char cArr[4] = { 0, 0, 0, 0 };

592

p = GetChar( p, cArr, &len, encoding );

593

text->append( cArr, len );

594

}

595

}

596

else

597

{

598

bool whitespace = false;

599

600

// Remove leading white space:

601

p = SkipWhiteSpace( p, encoding );

602

while ( p && *p

603

&& !StringEqual( p, endTag, caseInsensitive, encoding ) )

604

{

605

if ( *p == '\r' || *p == '\n' )

606

{

607

whitespace = true;

608

++p;

609

}

610

else if ( IsWhiteSpace( *p ) )

611

{

612

whitespace = true;

613

++p;

614

}

615

else

616

{

617

// If we've found whitespace, add it before the

618

// new character. Any whitespace just becomes a space.

619

if ( whitespace )

620

{

621

(*text) += ' ';

622

whitespace = false;

623

}

624

int len;

625

char cArr[4] = { 0, 0, 0, 0 };

626

p = GetChar( p, cArr, &len, encoding );

627

if ( len == 1 )

628

(*text) += cArr[0]; // more efficient

629

else

630

text->append( cArr, len );

631

}

632

}

633

}

634

if ( p )

635

p += strlen( endTag );

636

return p;

637

}

638

639

#ifdef TIXML_USE_STL

640

641

void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )

642

{

643

// The basic issue with a document is that we don't know what we're

644

// streaming. Read something presumed to be a tag (and hope), then

645

// identify it, and call the appropriate stream method on the tag.

646

647

// This "pre-streaming" will never read the closing ">" so the

648

// sub-tag can orient itself.

649

650

if ( !StreamTo( in, '<', tag ) )

651

{

652

SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );

653

return;

654

}

655

656

while ( in->good() )

657

{

658

int tagIndex = (int) tag->length();

659

while ( in->good() && in->peek() != '>' )

660

{

661

int c = in->get();

662

if ( c <= 0 )

663

{

664

SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

665

break;

666

}

667

(*tag) += (char) c;

668

}

669

670

if ( in->good() )

671

{

672

// We now have something we presume to be a node of

673

// some sort. Identify it, and call the node to

674

// continue streaming.

675

TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );

676

677

if ( node )

678

{

679

node->StreamIn( in, tag );

680

bool isElement = node->ToElement() != 0;

681

delete node;

682

node = 0;

683

684

// If this is the root element, we're done. Parsing will be

685

// done by the >> operator.

686

if ( isElement )

687

{

688

return;

689

}

690

}

691

else

692

{

693

SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );

694

return;

695

}

696

}

697

}

698

// We should have returned sooner.

699

SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );

700

}

701

702

#endif

703

704

const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )

705

{

706

ClearError();

707

708

// Parse away, at the document level. Since a document

709

// contains nothing but other tags, most of what happens

710

// here is skipping white space.

711

if ( !p || !*p )

712

{

713

SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );

714

return 0;

715

}

716

717

// Note that, for a document, this needs to come

718

// before the while space skip, so that parsing

719

// starts from the pointer we are given.

720

location.Clear();

721

if ( prevData )

722

{

723

location.row = prevData->cursor.row;

724

location.col = prevData->cursor.col;

725

}

726

else

727

{

728

location.row = 0;

729

location.col = 0;

730

}

731

TiXmlParsingData data( p, TabSize(), location.row, location.col );

732

location = data.Cursor();

733

734

if ( encoding == TIXML_ENCODING_UNKNOWN )

735

{

736

// Check for the Microsoft UTF-8 lead bytes.

737

const unsigned char* pU = (const unsigned char*)p;

738

if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0

739

&& *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1

740

&& *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )

741

{

742

encoding = TIXML_ENCODING_UTF8;

743

useMicrosoftBOM = true;

744

}

745

}

746

747

p = SkipWhiteSpace( p, encoding );

748

if ( !p )

749

{

750

SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );

751

return 0;

752

}

753

754

while ( p && *p )

755

{

756

TiXmlNode* node = Identify( p, encoding );

757

if ( node )

758

{

759

p = node->Parse( p, &data, encoding );

760

LinkEndChild( node );

761

}

762

else

763

{

764

break;

765

}

766

767

// Did we get encoding info?

768

if ( encoding == TIXML_ENCODING_UNKNOWN

769

&& node->ToDeclaration() )

770

{

771

TiXmlDeclaration* dec = node->ToDeclaration();

772

const char* enc = dec->Encoding();

773

assert( enc );

774

775

if ( *enc == 0 )

776

encoding = TIXML_ENCODING_UTF8;

777

else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )

778

encoding = TIXML_ENCODING_UTF8;

779

else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )

780

encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice

781

else

782

encoding = TIXML_ENCODING_LEGACY;

783

}

784

785

p = SkipWhiteSpace( p, encoding );

786

}

787

788

// Was this empty?

789

if ( !firstChild ) {

790

SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );

791

return 0;

792

}

793

794

// All is well.

795

return p;

796

}

797

798

void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )

799

{

800

// The first error in a chain is more accurate - don't set again!

801

if ( error )

802

return;

803

804

assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );

805

error = true;

806

errorId = err;

807

errorDesc = errorString[ errorId ];

808

809

errorLocation.Clear();

810

if ( pError && data )

811

{

812

data->Stamp( pError, encoding );

813

errorLocation = data->Cursor();

814

}

815

}

816

817

818

TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )

819

{

820

TiXmlNode* returnNode = 0;

821

822

p = SkipWhiteSpace( p, encoding );

823

if( !p || !*p || *p != '<' )

824

{

825

return 0;

826

}

827

828

TiXmlDocument* doc = GetDocument();

829

p = SkipWhiteSpace( p, encoding );

830

831

if ( !p || !*p )

832

{

833

return 0;

834

}

835

836

// What is this thing?

837

// - Elements start with a letter or underscore, but xml is reserved.

838

// - Comments: <!--

839

// - Decleration: <?xml

840

// - Everthing else is unknown to tinyxml.

841

842

843

const char* xmlHeader = { "<?xml" };

844

const char* commentHeader = { "<!--" };

845

const char* dtdHeader = { "<!" };

846

const char* cdataHeader = { "<![CDATA[" };

847

848

if ( StringEqual( p, xmlHeader, true, encoding ) )

849

{

850

#ifdef DEBUG_PARSER

851

TIXML_LOG( "XML parsing Declaration\n" );

852

#endif

853

returnNode = new TiXmlDeclaration();

854

}

855

else if ( StringEqual( p, commentHeader, false, encoding ) )

856

{

857

#ifdef DEBUG_PARSER

858

TIXML_LOG( "XML parsing Comment\n" );

859

#endif

860

returnNode = new TiXmlComment();

861

}

862

else if ( StringEqual( p, cdataHeader, false, encoding ) )

863

{

864

#ifdef DEBUG_PARSER

865

TIXML_LOG( "XML parsing CDATA\n" );

866

#endif

867

TiXmlText* text = new TiXmlText( "" );

868

text->SetCDATA( true );

869

returnNode = text;

870

}

871

else if ( StringEqual( p, dtdHeader, false, encoding ) )

872

{

873

#ifdef DEBUG_PARSER

874

TIXML_LOG( "XML parsing Unknown(1)\n" );

875

#endif

876

returnNode = new TiXmlUnknown();

877

}

878

else if ( IsAlpha( *(p+1), encoding )

879

|| *(p+1) == '_' )

880

{

881

#ifdef DEBUG_PARSER

882

TIXML_LOG( "XML parsing Element\n" );

883

#endif

884

returnNode = new TiXmlElement( "" );

885

}

886

else

887

{

888

#ifdef DEBUG_PARSER

889

TIXML_LOG( "XML parsing Unknown(2)\n" );

890

#endif

891

returnNode = new TiXmlUnknown();

892

}

893

894

if ( returnNode )

895

{

896

// Set the parent, so it can report errors

897

returnNode->parent = this;

898

}

899

else

900

{

901

if ( doc )

902

doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );

903

}

904

return returnNode;

905

}

906

907

#ifdef TIXML_USE_STL

908

909

void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)

910

{

911

// We're called with some amount of pre-parsing. That is, some of "this"

912

// element is in "tag". Go ahead and stream to the closing ">"

913

while( in->good() )

914

{

915

int c = in->get();

916

if ( c <= 0 )

917

{

918

TiXmlDocument* document = GetDocument();

919

if ( document )

920

document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

921

return;

922

}

923

(*tag) += (char) c ;

924

925

if ( c == '>' )

926

break;

927

}

928

929

if ( tag->length() < 3 ) return;

930

931

// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.

932

// If not, identify and stream.

933

934

if ( tag->at( tag->length() - 1 ) == '>'

935

&& tag->at( tag->length() - 2 ) == '/' )

936

{

937

// All good!

938

return;

939

}

940

else if ( tag->at( tag->length() - 1 ) == '>' )

941

{

942

// There is more. Could be:

943

// text

944

// cdata text (which looks like another node)

945

// closing tag

946

// another node.

947

for ( ;; )

948

{

949

StreamWhiteSpace( in, tag );

950

951

// Do we have text?

952

if ( in->good() && in->peek() != '<' )

953

{

954

// Yep, text.

955

TiXmlText text( "" );

956

text.StreamIn( in, tag );

957

958

// What follows text is a closing tag or another node.

959

// Go around again and figure it out.

960

continue;

961

}

962

963

// We now have either a closing tag...or another node.

964

// We should be at a "<", regardless.

965

if ( !in->good() ) return;

966

assert( in->peek() == '<' );

967

int tagIndex = (int) tag->length();

968

969

bool closingTag = false;

970

bool firstCharFound = false;

971

972

for( ;; )

973

{

974

if ( !in->good() )

975

return;

976

977

int c = in->peek();

978

if ( c <= 0 )

979

{

980

TiXmlDocument* document = GetDocument();

981

if ( document )

982

document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

983

return;

984

}

985

986

if ( c == '>' )

987

break;

988

989

*tag += (char) c;

990

in->get();

991

992

// Early out if we find the CDATA id.

993

if ( c == '[' && tag->size() >= 9 )

994

{

995

size_t len = tag->size();

996

const char* start = tag->c_str() + len - 9;

997

if ( strcmp( start, "<![CDATA[" ) == 0 ) {

998

assert( !closingTag );

999

break;

1000

}

1001

}

1002

1003

if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )

1004

{

1005

firstCharFound = true;

1006

if ( c == '/' )

1007

closingTag = true;

1008

}

1009

}

1010

// If it was a closing tag, then read in the closing '>' to clean up the input stream.

1011

// If it was not, the streaming will be done by the tag.

1012

if ( closingTag )

1013

{

1014

if ( !in->good() )

1015

return;

1016

1017

int c = in->get();

1018

if ( c <= 0 )

1019

{

1020

TiXmlDocument* document = GetDocument();

1021

if ( document )

1022

document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

1023

return;

1024

}

1025

assert( c == '>' );

1026

*tag += (char) c;

1027

1028

// We are done, once we've found our closing tag.

1029

return;

1030

}

1031

else

1032

{

1033

// If not a closing tag, id it, and stream.

1034

const char* tagloc = tag->c_str() + tagIndex;

1035

TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );

1036

if ( !node )

1037

return;

1038

node->StreamIn( in, tag );

1039

delete node;

1040

node = 0;

1041

1042

// No return: go around from the beginning: text, closing tag, or node.

1043

}

1044

}

1045

}

1046

}

1047

#endif

1048

1049

const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

1050

{

1051

p = SkipWhiteSpace( p, encoding );

1052

TiXmlDocument* document = GetDocument();

1053

1054

if ( !p || !*p )

1055

{

1056

if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );

1057

return 0;

1058

}

1059

1060

if ( data )

1061

{

1062

data->Stamp( p, encoding );

1063

location = data->Cursor();

1064

}

1065

1066

if ( *p != '<' )

1067

{

1068

if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );

1069

return 0;

1070

}

1071

1072

p = SkipWhiteSpace( p+1, encoding );

1073

1074

// Read the name.

1075

const char* pErr = p;

1076

1077

p = ReadName( p, &value, encoding );

1078

if ( !p || !*p )

1079

{

1080

if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );

1081

return 0;

1082

}

1083

1084

TIXML_STRING endTag ("</");

1085

endTag += value;

1086

endTag += ">";

1087

1088

// Check for and read attributes. Also look for an empty

1089

// tag or an end tag.

1090

while ( p && *p )

1091

{

1092

pErr = p;

1093

p = SkipWhiteSpace( p, encoding );

1094

if ( !p || !*p )

1095

{

1096

if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );

1097

return 0;

1098

}

1099

if ( *p == '/' )

1100

{

1101

++p;

1102

// Empty tag.

1103

if ( *p != '>' )

1104

{

1105

if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );

1106

return 0;

1107

}

1108

return (p+1);

1109

}

1110

else if ( *p == '>' )

1111

{

1112

// Done with attributes (if there were any.)

1113

// Read the value -- which can include other

1114

// elements -- read the end tag, and return.

1115

++p;

1116

p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.

1117

if ( !p || !*p ) {

1118

// We were looking for the end tag, but found nothing.

1119

// Fix for [ 1663758 ] Failure to report error on bad XML

1120

if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );

1121

return 0;

1122

}

1123

1124

// We should find the end tag now

1125

if ( StringEqual( p, endTag.c_str(), false, encoding ) )

1126

{

1127

p += endTag.length();

1128

return p;

1129

}

1130

else

1131

{

1132

if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );

1133

return 0;

1134

}

1135

}

1136

else

1137

{

1138

// Try to read an attribute:

1139

TiXmlAttribute* attrib = new TiXmlAttribute();

1140

if ( !attrib )

1141

{

1142

if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );

1143

return 0;

1144

}

1145

1146

attrib->SetDocument( document );

1147

pErr = p;

1148

p = attrib->Parse( p, data, encoding );

1149

1150

if ( !p || !*p )

1151

{

1152

if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );

1153

delete attrib;

1154

return 0;

1155

}

1156

1157

// Handle the strange case of double attributes:

1158

#ifdef TIXML_USE_STL

1159

TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );

1160

#else

1161

TiXmlAttribute* node = attributeSet.Find( attrib->Name() );

1162

#endif

1163

if ( node )

1164

{

1165

node->SetValue( attrib->Value() );

1166

delete attrib;

1167

return 0;

1168

}

1169

1170

attributeSet.Add( attrib );

1171

}

1172

}

1173

return p;

1174

}

1175

1176

1177

const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

1178

{

1179

TiXmlDocument* document = GetDocument();

1180

1181

// Read in text and elements in any order.

1182

const char* pWithWhiteSpace = p;

1183

p = SkipWhiteSpace( p, encoding );

1184

1185

while ( p && *p )

1186

{

1187

if ( *p != '<' )

1188

{

1189

// Take what we have, make a text element.

1190

TiXmlText* textNode = new TiXmlText( "" );

1191

1192

if ( !textNode )

1193

{

1194

if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );

1195

return 0;

1196

}

1197

1198

if ( TiXmlBase::IsWhiteSpaceCondensed() )

1199

{

1200

p = textNode->Parse( p, data, encoding );

1201

}

1202

else

1203

{

1204

// Special case: we want to keep the white space

1205

// so that leading spaces aren't removed.

1206

p = textNode->Parse( pWithWhiteSpace, data, encoding );

1207

}

1208

1209

if ( !textNode->Blank() )

1210

LinkEndChild( textNode );

1211

else

1212

delete textNode;

1213

}

1214

else

1215

{

1216

// We hit a '<'

1217

// Have we hit a new element or an end tag? This could also be

1218

// a TiXmlText in the "CDATA" style.

1219

if ( StringEqual( p, "</", false, encoding ) )

1220

{

1221

return p;

1222

}

1223

else

1224

{

1225

TiXmlNode* node = Identify( p, encoding );

1226

if ( node )

1227

{

1228

p = node->Parse( p, data, encoding );

1229

LinkEndChild( node );

1230

}

1231

else

1232

{

1233

return 0;

1234

}

1235

}

1236

}

1237

pWithWhiteSpace = p;

1238

p = SkipWhiteSpace( p, encoding );

1239

}

1240

1241

if ( !p )

1242

{

1243

if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );

1244

}

1245

return p;

1246

}

1247

1248

1249

#ifdef TIXML_USE_STL

1250

void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )

1251

{

1252

while ( in->good() )

1253

{

1254

int c = in->get();

1255

if ( c <= 0 )

1256

{

1257

TiXmlDocument* document = GetDocument();

1258

if ( document )

1259

document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

1260

return;

1261

}

1262

(*tag) += (char) c;

1263

1264

if ( c == '>' )

1265

{

1266

// All is well.

1267

return;

1268

}

1269

}

1270

}

1271

#endif

1272

1273

1274

const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

1275

{

1276

TiXmlDocument* document = GetDocument();

1277

p = SkipWhiteSpace( p, encoding );

1278

1279

if ( data )

1280

{

1281

data->Stamp( p, encoding );

1282

location = data->Cursor();

1283

}

1284

if ( !p || !*p || *p != '<' )

1285

{

1286

if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );

1287

return 0;

1288

}

1289

++p;

1290

value = "";

1291

1292

while ( p && *p && *p != '>' )

1293

{

1294

value += *p;

1295

++p;

1296

}

1297

1298

if ( !p )

1299

{

1300

if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );

1301

}

1302

if ( *p == '>' )

1303

return p+1;

1304

return p;

1305

}

1306

1307

#ifdef TIXML_USE_STL

1308

void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )

1309

{

1310

while ( in->good() )

1311

{

1312

int c = in->get();

1313

if ( c <= 0 )

1314

{

1315

TiXmlDocument* document = GetDocument();

1316

if ( document )

1317

document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

1318

return;

1319

}

1320

1321

(*tag) += (char) c;

1322

1323

if ( c == '>'

1324

&& tag->at( tag->length() - 2 ) == '-'

1325

&& tag->at( tag->length() - 3 ) == '-' )

1326

{

1327

// All is well.

1328

return;

1329

}

1330

}

1331

}

1332

#endif

1333

1334

1335

const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

1336

{

1337

TiXmlDocument* document = GetDocument();

1338

value = "";

1339

1340

p = SkipWhiteSpace( p, encoding );

1341

1342

if ( data )

1343

{

1344

data->Stamp( p, encoding );

1345

location = data->Cursor();

1346

}

1347

const char* startTag = "<!--";

1348

const char* endTag = "-->";

1349

1350

if ( !StringEqual( p, startTag, false, encoding ) )

1351

{

1352

document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );

1353

return 0;

1354

}

1355

p += strlen( startTag );

1356

1357

// [ 1475201 ] TinyXML parses entities in comments

1358

// Oops - ReadText doesn't work, because we don't want to parse the entities.

1359

// p = ReadText( p, &value, false, endTag, false, encoding );

1360

1361

// from the XML spec:

1362

1363

[Definition: Comments may appear anywhere in a document outside other markup; in addition,

1364

they may appear within the document type declaration at places allowed by the grammar.

1365

They are not part of the document's character data; an XML processor MAY, but need not,

1366

make it possible for an application to retrieve the text of comments. For compatibility,

1367

the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity

1368

references MUST NOT be recognized within comments.

1369

1370

An example of a comment:

1371

1372

1373

1374

1375

value = "";

1376

// Keep all the white space.

1377

while ( p && *p && !StringEqual( p, endTag, false, encoding ) )

1378

{

1379

value.append( p, 1 );

1380

++p;

1381

}

1382

if ( p )

1383

p += strlen( endTag );

1384

1385

return p;

1386

}

1387

1388

1389

const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

1390

{

1391

p = SkipWhiteSpace( p, encoding );

1392

if ( !p || !*p ) return 0;

1393

1394

// int tabsize = 4;

1395

// if ( document )

1396

// tabsize = document->TabSize();

1397

1398

if ( data )

1399

{

1400

data->Stamp( p, encoding );

1401

location = data->Cursor();

1402

}

1403

// Read the name, the '=' and the value.

1404

const char* pErr = p;

1405

p = ReadName( p, &name, encoding );

1406

if ( !p || !*p )

1407

{

1408

if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );

1409

return 0;

1410

}

1411

p = SkipWhiteSpace( p, encoding );

1412

if ( !p || !*p || *p != '=' )

1413

{

1414

if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );

1415

return 0;

1416

}

1417

1418

++p; // skip '='

1419

p = SkipWhiteSpace( p, encoding );

1420

if ( !p || !*p )

1421

{

1422

if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );

1423

return 0;

1424

}

1425

1426

const char* end;

1427

const char SINGLE_QUOTE = '\'';

1428

const char DOUBLE_QUOTE = '\"';

1429

1430

if ( *p == SINGLE_QUOTE )

1431

{

1432

++p;

1433

end = "\'"; // single quote in string

1434

p = ReadText( p, &value, false, end, false, encoding );

1435

}

1436

else if ( *p == DOUBLE_QUOTE )

1437

{

1438

++p;

1439

end = "\""; // double quote in string

1440

p = ReadText( p, &value, false, end, false, encoding );

1441

}

1442

else

1443

{

1444

// All attribute values should be in single or double quotes.

1445

// But this is such a common error that the parser will try

1446

// its best, even without them.

1447

value = "";

1448

while ( p && *p // existence

1449

&& !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace

1450

&& *p != '/' && *p != '>' ) // tag end

1451

{

1452

if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {

1453

// [ 1451649 ] Attribute values with trailing quotes not handled correctly

1454

// We did not have an opening quote but seem to have a

1455

// closing one. Give up and throw an error.

1456

if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );

1457

return 0;

1458

}

1459

value += *p;

1460

++p;

1461

}

1462

}

1463

return p;

1464

}

1465

1466

#ifdef TIXML_USE_STL

1467

void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )

1468

{

1469

while ( in->good() )

1470

{

1471

int c = in->peek();

1472

if ( !cdata && (c == '<' ) )

1473

{

1474

return;

1475

}

1476

if ( c <= 0 )

1477

{

1478

TiXmlDocument* document = GetDocument();

1479

if ( document )

1480

document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

1481

return;

1482

}

1483

1484

(*tag) += (char) c;

1485

in->get(); // "commits" the peek made above

1486

1487

if ( cdata && c == '>' && tag->size() >= 3 ) {

1488

size_t len = tag->size();

1489

if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {

1490

// terminator of cdata.

1491

return;

1492

}

1493

}

1494

}

1495

}

1496

#endif

1497

1498

const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

1499

{

1500

value = "";

1501

TiXmlDocument* document = GetDocument();

1502

1503

if ( data )

1504

{

1505

data->Stamp( p, encoding );

1506

location = data->Cursor();

1507

}

1508

1509

const char* const startTag = "<![CDATA[";

1510

const char* const endTag = "]]>";

1511

1512

if ( cdata || StringEqual( p, startTag, false, encoding ) )

1513

{

1514

cdata = true;

1515

1516

if ( !StringEqual( p, startTag, false, encoding ) )

1517

{

1518

document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );

1519

return 0;

1520

}

1521

p += strlen( startTag );

1522

1523

// Keep all the white space, ignore the encoding, etc.

1524

while ( p && *p

1525

&& !StringEqual( p, endTag, false, encoding )

1526

)

1527

{

1528

value += *p;

1529

++p;

1530

}

1531

1532

TIXML_STRING dummy;

1533

p = ReadText( p, &dummy, false, endTag, false, encoding );

1534

return p;

1535

}

1536

else

1537

{

1538

bool ignoreWhite = true;

1539

1540

const char* end = "<";

1541

p = ReadText( p, &value, ignoreWhite, end, false, encoding );

1542

if ( p )

1543

return p-1; // don't truncate the '<'

1544

return 0;

1545

}

1546

}

1547

1548

#ifdef TIXML_USE_STL

1549

void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )

1550

{

1551

while ( in->good() )

1552

{

1553

int c = in->get();

1554

if ( c <= 0 )

1555

{

1556

TiXmlDocument* document = GetDocument();

1557

if ( document )

1558

document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

1559

return;

1560

}

1561

(*tag) += (char) c;

1562

1563

if ( c == '>' )

1564

{

1565

// All is well.

1566

return;

1567

}

1568

}

1569

}

1570

#endif

1571

1572

const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )

1573

{

1574

p = SkipWhiteSpace( p, _encoding );

1575

// Find the beginning, find the end, and look for

1576

// the stuff in-between.

1577

TiXmlDocument* document = GetDocument();

1578

if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )

1579

{

1580

if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );

1581

return 0;

1582

}

1583

if ( data )

1584

{

1585

data->Stamp( p, _encoding );

1586

location = data->Cursor();

1587

}

1588

p += 5;

1589

1590

version = "";

1591

encoding = "";

1592

standalone = "";

1593

1594

while ( p && *p )

1595

{

1596

if ( *p == '>' )

1597

{

1598

++p;

1599

return p;

1600

}

1601

1602

p = SkipWhiteSpace( p, _encoding );

1603

if ( StringEqual( p, "version", true, _encoding ) )

1604

{

1605

TiXmlAttribute attrib;

1606

p = attrib.Parse( p, data, _encoding );

1607

version = attrib.Value();

1608

}

1609

else if ( StringEqual( p, "encoding", true, _encoding ) )

1610

{

1611

TiXmlAttribute attrib;

1612

p = attrib.Parse( p, data, _encoding );

1613

encoding = attrib.Value();

1614

}

1615

else if ( StringEqual( p, "standalone", true, _encoding ) )

1616

{

1617

TiXmlAttribute attrib;

1618

p = attrib.Parse( p, data, _encoding );

1619

standalone = attrib.Value();

1620

}

1621

else

1622

{

1623

// Read over whatever it is.

1624

while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )

1625

++p;

1626

}

1627

}

1628

return 0;

1629

}

1630

1631

bool TiXmlText::Blank() const

1632

{

1633

for ( unsigned i=0; i<value.length(); i++ )

1634

if ( !IsWhiteSpace( value[i] ) )

1635

return false;

1636

return true;

1637

}

1638

Older »