~ubuntu-branches/ubuntu/quantal/libxml2/quantal-updates : revision 61

1

/*

2

* parserInternals.c : Internal routines (and obsolete ones) needed for the

3

* XML and HTML parsers.

4

*

5

* See Copyright for the status of this software.

6

*

7

* daniel@veillard.com

8

*/

9

10

#define IN_LIBXML

11

#include "libxml.h"

12

13

#if defined(WIN32) && !defined (__CYGWIN__)

14

#define XML_DIR_SEP '\\'

15

#else

16

#define XML_DIR_SEP '/'

17

#endif

18

19

#include <string.h>

20

#ifdef HAVE_CTYPE_H

21

#include <ctype.h>

22

#endif

23

#ifdef HAVE_STDLIB_H

24

#include <stdlib.h>

25

#endif

26

#ifdef HAVE_SYS_STAT_H

27

#include <sys/stat.h>

28

#endif

29

#ifdef HAVE_FCNTL_H

30

#include <fcntl.h>

31

#endif

32

#ifdef HAVE_UNISTD_H

33

#include <unistd.h>

34

#endif

35

#ifdef HAVE_ZLIB_H

36

#include <zlib.h>

37

#endif

38

39

#include <libxml/xmlmemory.h>

40

#include <libxml/tree.h>

41

#include <libxml/parser.h>

42

#include <libxml/parserInternals.h>

43

#include <libxml/valid.h>

44

#include <libxml/entities.h>

45

#include <libxml/xmlerror.h>

46

#include <libxml/encoding.h>

47

#include <libxml/valid.h>

48

#include <libxml/xmlIO.h>

49

#include <libxml/uri.h>

50

#include <libxml/dict.h>

51

#include <libxml/SAX.h>

52

#ifdef LIBXML_CATALOG_ENABLED

53

#include <libxml/catalog.h>

54

#endif

55

#include <libxml/globals.h>

56

#include <libxml/chvalid.h>

57

58

/*

59

* Various global defaults for parsing

60

*/

61

62

/**

63

* xmlCheckVersion:

64

* @version: the include version number

65

*

66

* check the compiled lib version against the include one.

67

* This can warn or immediately kill the application

68

*/

69

void

70

xmlCheckVersion(int version) {

71

int myversion = (int) LIBXML_VERSION;

72

73

xmlInitParser();

74

75

if ((myversion / 10000) != (version / 10000)) {

76

xmlGenericError(xmlGenericErrorContext,

77

"Fatal: program compiled against libxml %d using libxml %d\n",

78

(version / 10000), (myversion / 10000));

79

fprintf(stderr,

80

"Fatal: program compiled against libxml %d using libxml %d\n",

81

(version / 10000), (myversion / 10000));

82

}

83

if ((myversion / 100) < (version / 100)) {

84

xmlGenericError(xmlGenericErrorContext,

85

"Warning: program compiled against libxml %d using older %d\n",

86

(version / 100), (myversion / 100));

87

}

88

}

89

90

91

/************************************************************************

92

* *

93

* Some factorized error routines *

94

* *

95

************************************************************************/

96

97

98

/**

99

* xmlErrMemory:

100

* @ctxt: an XML parser context

101

* @extra: extra informations

102

*

103

* Handle a redefinition of attribute error

104

*/

105

void

106

xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)

107

{

108

if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&

109

(ctxt->instate == XML_PARSER_EOF))

110

return;

111

if (ctxt != NULL) {

112

ctxt->errNo = XML_ERR_NO_MEMORY;

113

ctxt->instate = XML_PARSER_EOF;

114

ctxt->disableSAX = 1;

115

}

116

if (extra)

117

__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,

118

XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,

119

NULL, NULL, 0, 0,

120

"Memory allocation failed : %s\n", extra);

121

else

122

__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,

123

XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,

124

NULL, NULL, 0, 0, "Memory allocation failed\n");

125

}

126

127

/**

128

* __xmlErrEncoding:

129

* @ctxt: an XML parser context

130

* @xmlerr: the error number

131

* @msg: the error message

132

* @str1: an string info

133

* @str2: an string info

134

*

135

* Handle an encoding error

136

*/

137

void

138

__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,

139

const char *msg, const xmlChar * str1, const xmlChar * str2)

140

{

141

if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&

142

(ctxt->instate == XML_PARSER_EOF))

143

return;

144

if (ctxt != NULL)

145

ctxt->errNo = xmlerr;

146

__xmlRaiseError(NULL, NULL, NULL,

147

ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,

148

NULL, 0, (const char *) str1, (const char *) str2,

149

NULL, 0, 0, msg, str1, str2);

150

if (ctxt != NULL) {

151

ctxt->wellFormed = 0;

152

if (ctxt->recovery == 0)

153

ctxt->disableSAX = 1;

154

}

155

}

156

157

/**

158

* xmlErrInternal:

159

* @ctxt: an XML parser context

160

* @msg: the error message

161

* @str: error informations

162

*

163

* Handle an internal error

164

*/

165

static void

166

xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)

167

{

168

if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&

169

(ctxt->instate == XML_PARSER_EOF))

170

return;

171

if (ctxt != NULL)

172

ctxt->errNo = XML_ERR_INTERNAL_ERROR;

173

__xmlRaiseError(NULL, NULL, NULL,

174

ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,

175

XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,

176

0, 0, msg, str);

177

if (ctxt != NULL) {

178

ctxt->wellFormed = 0;

179

if (ctxt->recovery == 0)

180

ctxt->disableSAX = 1;

181

}

182

}

183

184

/**

185

* xmlErrEncodingInt:

186

* @ctxt: an XML parser context

187

* @error: the error number

188

* @msg: the error message

189

* @val: an integer value

190

*

191

* n encoding error

192

*/

193

static void

194

xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,

195

const char *msg, int val)

196

{

197

if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&

198

(ctxt->instate == XML_PARSER_EOF))

199

return;

200

if (ctxt != NULL)

201

ctxt->errNo = error;

202

__xmlRaiseError(NULL, NULL, NULL,

203

ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,

204

NULL, 0, NULL, NULL, NULL, val, 0, msg, val);

205

if (ctxt != NULL) {

206

ctxt->wellFormed = 0;

207

if (ctxt->recovery == 0)

208

ctxt->disableSAX = 1;

209

}

210

}

211

212

/**

213

* xmlIsLetter:

214

* @c: an unicode character (int)

215

*

216

* Check whether the character is allowed by the production

217

* [84] Letter ::= BaseChar | Ideographic

218

*

219

* Returns 0 if not, non-zero otherwise

220

*/

221

int

222

xmlIsLetter(int c) {

223

return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));

224

}

225

226

/************************************************************************

227

* *

228

* Input handling functions for progressive parsing *

229

* *

230

************************************************************************/

231

232

/* #define DEBUG_INPUT */

233

/* #define DEBUG_STACK */

234

/* #define DEBUG_PUSH */

235

236

237

/* we need to keep enough input to show errors in context */

238

#define LINE_LEN 80

239

240

#ifdef DEBUG_INPUT

241

#define CHECK_BUFFER(in) check_buffer(in)

242

243

static

244

void check_buffer(xmlParserInputPtr in) {

245

if (in->base != in->buf->buffer->content) {

246

xmlGenericError(xmlGenericErrorContext,

247

"xmlParserInput: base mismatch problem\n");

248

}

249

if (in->cur < in->base) {

250

xmlGenericError(xmlGenericErrorContext,

251

"xmlParserInput: cur < base problem\n");

252

}

253

if (in->cur > in->base + in->buf->buffer->use) {

254

xmlGenericError(xmlGenericErrorContext,

255

"xmlParserInput: cur > base + use problem\n");

256

}

257

xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",

258

(int) in, (int) in->buf->buffer->content, in->cur - in->base,

259

in->buf->buffer->use, in->buf->buffer->size);

260

}

261

262

#else

263

#define CHECK_BUFFER(in)

264

#endif

265

266

267

/**

268

* xmlParserInputRead:

269

* @in: an XML parser input

270

* @len: an indicative size for the lookahead

271

*

272

* This function refresh the input for the parser. It doesn't try to

273

* preserve pointers to the input buffer, and discard already read data

274

*

275

* Returns the number of xmlChars read, or -1 in case of error, 0 indicate the

276

* end of this entity

277

*/

278

int

279

xmlParserInputRead(xmlParserInputPtr in, int len) {

280

int ret;

281

int used;

282

int indx;

283

284

if (in == NULL) return(-1);

285

#ifdef DEBUG_INPUT

286

xmlGenericError(xmlGenericErrorContext, "Read\n");

287

#endif

288

if (in->buf == NULL) return(-1);

289

if (in->base == NULL) return(-1);

290

if (in->cur == NULL) return(-1);

291

if (in->buf->buffer == NULL) return(-1);

292

if (in->buf->readcallback == NULL) return(-1);

293

294

CHECK_BUFFER(in);

295

296

used = in->cur - in->buf->buffer->content;

297

ret = xmlBufferShrink(in->buf->buffer, used);

298

if (ret > 0) {

299

in->cur -= ret;

300

in->consumed += ret;

301

}

302

ret = xmlParserInputBufferRead(in->buf, len);

303

if (in->base != in->buf->buffer->content) {

304

/*

305

* the buffer has been reallocated

306

*/

307

indx = in->cur - in->base;

308

in->base = in->buf->buffer->content;

309

in->cur = &in->buf->buffer->content[indx];

310

}

311

in->end = &in->buf->buffer->content[in->buf->buffer->use];

312

313

CHECK_BUFFER(in);

314

315

return(ret);

316

}

317

318

/**

319

* xmlParserInputGrow:

320

* @in: an XML parser input

321

* @len: an indicative size for the lookahead

322

*

323

* This function increase the input for the parser. It tries to

324

* preserve pointers to the input buffer, and keep already read data

325

*

326

* Returns the number of xmlChars read, or -1 in case of error, 0 indicate the

327

* end of this entity

328

*/

329

int

330

xmlParserInputGrow(xmlParserInputPtr in, int len) {

331

int ret;

332

int indx;

333

334

if (in == NULL) return(-1);

335

#ifdef DEBUG_INPUT

336

xmlGenericError(xmlGenericErrorContext, "Grow\n");

337

#endif

338

if (in->buf == NULL) return(-1);

339

if (in->base == NULL) return(-1);

340

if (in->cur == NULL) return(-1);

341

if (in->buf->buffer == NULL) return(-1);

342

343

CHECK_BUFFER(in);

344

345

indx = in->cur - in->base;

346

if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {

347

348

CHECK_BUFFER(in);

349

350

return(0);

351

}

352

if (in->buf->readcallback != NULL)

353

ret = xmlParserInputBufferGrow(in->buf, len);

354

else

355

return(0);

356

357

/*

358

* NOTE : in->base may be a "dangling" i.e. freed pointer in this

359

* block, but we use it really as an integer to do some

360

* pointer arithmetic. Insure will raise it as a bug but in

361

* that specific case, that's not !

362

*/

363

if (in->base != in->buf->buffer->content) {

364

/*

365

* the buffer has been reallocated

366

*/

367

indx = in->cur - in->base;

368

in->base = in->buf->buffer->content;

369

in->cur = &in->buf->buffer->content[indx];

370

}

371

in->end = &in->buf->buffer->content[in->buf->buffer->use];

372

373

CHECK_BUFFER(in);

374

375

return(ret);

376

}

377

378

/**

379

* xmlParserInputShrink:

380

* @in: an XML parser input

381

*

382

* This function removes used input for the parser.

383

*/

384

void

385

xmlParserInputShrink(xmlParserInputPtr in) {

386

int used;

387

int ret;

388

int indx;

389

390

#ifdef DEBUG_INPUT

391

xmlGenericError(xmlGenericErrorContext, "Shrink\n");

392

#endif

393

if (in == NULL) return;

394

if (in->buf == NULL) return;

395

if (in->base == NULL) return;

396

if (in->cur == NULL) return;

397

if (in->buf->buffer == NULL) return;

398

399

CHECK_BUFFER(in);

400

401

used = in->cur - in->buf->buffer->content;

402

/*

403

* Do not shrink on large buffers whose only a tiny fraction

404

* was consumed

405

*/

406

if (used > INPUT_CHUNK) {

407

ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);

408

if (ret > 0) {

409

in->cur -= ret;

410

in->consumed += ret;

411

}

412

in->end = &in->buf->buffer->content[in->buf->buffer->use];

413

}

414

415

CHECK_BUFFER(in);

416

417

if (in->buf->buffer->use > INPUT_CHUNK) {

418

return;

419

}

420

xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);

421

if (in->base != in->buf->buffer->content) {

422

/*

423

* the buffer has been reallocated

424

*/

425

indx = in->cur - in->base;

426

in->base = in->buf->buffer->content;

427

in->cur = &in->buf->buffer->content[indx];

428

}

429

in->end = &in->buf->buffer->content[in->buf->buffer->use];

430

431

CHECK_BUFFER(in);

432

}

433

434

/************************************************************************

435

* *

436

* UTF8 character input and related functions *

437

* *

438

************************************************************************/

439

440

/**

441

* xmlNextChar:

442

* @ctxt: the XML parser context

443

*

444

* Skip to the next char input char.

445

*/

446

447

void

448

xmlNextChar(xmlParserCtxtPtr ctxt)

449

{

450

if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||

451

(ctxt->input == NULL))

452

return;

453

454

if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {

455

if ((*ctxt->input->cur == 0) &&

456

(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&

457

(ctxt->instate != XML_PARSER_COMMENT)) {

458

/*

459

* If we are at the end of the current entity and

460

* the context allows it, we pop consumed entities

461

* automatically.

462

* the auto closing should be blocked in other cases

463

*/

464

xmlPopInput(ctxt);

465

} else {

466

const unsigned char *cur;

467

unsigned char c;

468

469

/*

470

* 2.11 End-of-Line Handling

471

* the literal two-character sequence "#xD#xA" or a standalone

472

* literal #xD, an XML processor must pass to the application

473

* the single character #xA.

474

*/

475

if (*(ctxt->input->cur) == '\n') {

476

ctxt->input->line++; ctxt->input->col = 1;

477

} else

478

ctxt->input->col++;

479

480

/*

481

* We are supposed to handle UTF8, check it's valid

482

* From rfc2044: encoding of the Unicode values on UTF-8:

483

*

484

* UCS-4 range (hex.) UTF-8 octet sequence (binary)

485

* 0000 0000-0000 007F 0xxxxxxx

486

* 0000 0080-0000 07FF 110xxxxx 10xxxxxx

487

* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx

488

*

489

* Check for the 0x110000 limit too

490

*/

491

cur = ctxt->input->cur;

492

493

c = *cur;

494

if (c & 0x80) {

495

if (c == 0xC0)

496

goto encoding_error;

497

if (cur[1] == 0) {

498

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

499

cur = ctxt->input->cur;

500

}

501

if ((cur[1] & 0xc0) != 0x80)

502

goto encoding_error;

503

if ((c & 0xe0) == 0xe0) {

504

unsigned int val;

505

506

if (cur[2] == 0) {

507

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

508

cur = ctxt->input->cur;

509

}

510

if ((cur[2] & 0xc0) != 0x80)

511

goto encoding_error;

512

if ((c & 0xf0) == 0xf0) {

513

if (cur[3] == 0) {

514

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

515

cur = ctxt->input->cur;

516

}

517

if (((c & 0xf8) != 0xf0) ||

518

((cur[3] & 0xc0) != 0x80))

519

goto encoding_error;

520

/* 4-byte code */

521

ctxt->input->cur += 4;

522

val = (cur[0] & 0x7) << 18;

523

val |= (cur[1] & 0x3f) << 12;

524

val |= (cur[2] & 0x3f) << 6;

525

val |= cur[3] & 0x3f;

526

} else {

527

/* 3-byte code */

528

ctxt->input->cur += 3;

529

val = (cur[0] & 0xf) << 12;

530

val |= (cur[1] & 0x3f) << 6;

531

val |= cur[2] & 0x3f;

532

}

533

if (((val > 0xd7ff) && (val < 0xe000)) ||

534

((val > 0xfffd) && (val < 0x10000)) ||

535

(val >= 0x110000)) {

536

xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,

537

"Char 0x%X out of allowed range\n",

538

val);

539

}

540

} else

541

/* 2-byte code */

542

ctxt->input->cur += 2;

543

} else

544

/* 1-byte code */

545

ctxt->input->cur++;

546

547

ctxt->nbChars++;

548

if (*ctxt->input->cur == 0)

549

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

550

}

551

} else {

552

/*

553

* Assume it's a fixed length encoding (1) with

554

* a compatible encoding for the ASCII set, since

555

* XML constructs only use < 128 chars

556

*/

557

558

if (*(ctxt->input->cur) == '\n') {

559

ctxt->input->line++; ctxt->input->col = 1;

560

} else

561

ctxt->input->col++;

562

ctxt->input->cur++;

563

ctxt->nbChars++;

564

if (*ctxt->input->cur == 0)

565

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

566

}

567

if ((*ctxt->input->cur == '%') && (!ctxt->html))

568

xmlParserHandlePEReference(ctxt);

569

if ((*ctxt->input->cur == 0) &&

570

(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))

571

xmlPopInput(ctxt);

572

return;

573

encoding_error:

574

/*

575

* If we detect an UTF8 error that probably mean that the

576

* input encoding didn't get properly advertised in the

577

* declaration header. Report the error and switch the encoding

578

* to ISO-Latin-1 (if you don't like this policy, just declare the

579

* encoding !)

580

*/

581

if ((ctxt == NULL) || (ctxt->input == NULL) ||

582

(ctxt->input->end - ctxt->input->cur < 4)) {

583

__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,

584

"Input is not proper UTF-8, indicate encoding !\n",

585

NULL, NULL);

586

} else {

587

char buffer[150];

588

589

snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",

590

ctxt->input->cur[0], ctxt->input->cur[1],

591

ctxt->input->cur[2], ctxt->input->cur[3]);

592

__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,

593

"Input is not proper UTF-8, indicate encoding !\n%s",

594

BAD_CAST buffer, NULL);

595

}

596

ctxt->charset = XML_CHAR_ENCODING_8859_1;

597

ctxt->input->cur++;

598

return;

599

}

600

601

/**

602

* xmlCurrentChar:

603

* @ctxt: the XML parser context

604

* @len: pointer to the length of the char read

605

*

606

* The current char value, if using UTF-8 this may actually span multiple

607

* bytes in the input buffer. Implement the end of line normalization:

608

* 2.11 End-of-Line Handling

609

* Wherever an external parsed entity or the literal entity value

610

* of an internal parsed entity contains either the literal two-character

611

* sequence "#xD#xA" or a standalone literal #xD, an XML processor

612

* must pass to the application the single character #xA.

613

* This behavior can conveniently be produced by normalizing all

614

* line breaks to #xA on input, before parsing.)

615

*

616

* Returns the current char value and its length

617

*/

618

619

int

620

xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {

621

if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);

622

if (ctxt->instate == XML_PARSER_EOF)

623

return(0);

624

625

if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {

626

*len = 1;

627

return((int) *ctxt->input->cur);

628

}

629

if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {

630

/*

631

* We are supposed to handle UTF8, check it's valid

632

* From rfc2044: encoding of the Unicode values on UTF-8:

633

*

634

* UCS-4 range (hex.) UTF-8 octet sequence (binary)

635

* 0000 0000-0000 007F 0xxxxxxx

636

* 0000 0080-0000 07FF 110xxxxx 10xxxxxx

637

* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx

638

*

639

* Check for the 0x110000 limit too

640

*/

641

const unsigned char *cur = ctxt->input->cur;

642

unsigned char c;

643

unsigned int val;

644

645

c = *cur;

646

if (c & 0x80) {

647

if (((c & 0x40) == 0) || (c == 0xC0))

648

goto encoding_error;

649

if (cur[1] == 0) {

650

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

651

cur = ctxt->input->cur;

652

}

653

if ((cur[1] & 0xc0) != 0x80)

654

goto encoding_error;

655

if ((c & 0xe0) == 0xe0) {

656

if (cur[2] == 0) {

657

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

658

cur = ctxt->input->cur;

659

}

660

if ((cur[2] & 0xc0) != 0x80)

661

goto encoding_error;

662

if ((c & 0xf0) == 0xf0) {

663

if (cur[3] == 0) {

664

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

665

cur = ctxt->input->cur;

666

}

667

if (((c & 0xf8) != 0xf0) ||

668

((cur[3] & 0xc0) != 0x80))

669

goto encoding_error;

670

/* 4-byte code */

671

*len = 4;

672

val = (cur[0] & 0x7) << 18;

673

val |= (cur[1] & 0x3f) << 12;

674

val |= (cur[2] & 0x3f) << 6;

675

val |= cur[3] & 0x3f;

676

if (val < 0x10000)

677

goto encoding_error;

678

} else {

679

/* 3-byte code */

680

*len = 3;

681

val = (cur[0] & 0xf) << 12;

682

val |= (cur[1] & 0x3f) << 6;

683

val |= cur[2] & 0x3f;

684

if (val < 0x800)

685

goto encoding_error;

686

}

687

} else {

688

/* 2-byte code */

689

*len = 2;

690

val = (cur[0] & 0x1f) << 6;

691

val |= cur[1] & 0x3f;

692

if (val < 0x80)

693

goto encoding_error;

694

}

695

if (!IS_CHAR(val)) {

696

xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,

697

"Char 0x%X out of allowed range\n", val);

698

}

699

return(val);

700

} else {

701

/* 1-byte code */

702

*len = 1;

703

if (*ctxt->input->cur == 0)

704

xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

705

if ((*ctxt->input->cur == 0) &&

706

(ctxt->input->end > ctxt->input->cur)) {

707

xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,

708

"Char 0x0 out of allowed range\n", 0);

709

}

710

if (*ctxt->input->cur == 0xD) {

711

if (ctxt->input->cur[1] == 0xA) {

712

ctxt->nbChars++;

713

ctxt->input->cur++;

714

}

715

return(0xA);

716

}

717

return((int) *ctxt->input->cur);

718

}

719

}

720

/*

721

* Assume it's a fixed length encoding (1) with

722

* a compatible encoding for the ASCII set, since

723

* XML constructs only use < 128 chars

724

*/

725

*len = 1;

726

if (*ctxt->input->cur == 0xD) {

727

if (ctxt->input->cur[1] == 0xA) {

728

ctxt->nbChars++;

729

ctxt->input->cur++;

730

}

731

return(0xA);

732

}

733

return((int) *ctxt->input->cur);

734

encoding_error:

735

/*

736

* An encoding problem may arise from a truncated input buffer

737

* splitting a character in the middle. In that case do not raise

738

* an error but return 0 to endicate an end of stream problem

739

*/

740

if (ctxt->input->end - ctxt->input->cur < 4) {

741

*len = 0;

742

return(0);

743

}

744

745

/*

746

* If we detect an UTF8 error that probably mean that the

747

* input encoding didn't get properly advertised in the

748

* declaration header. Report the error and switch the encoding

749

* to ISO-Latin-1 (if you don't like this policy, just declare the

750

* encoding !)

751

*/

752

{

753

char buffer[150];

754

755

snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",

756

ctxt->input->cur[0], ctxt->input->cur[1],

757

ctxt->input->cur[2], ctxt->input->cur[3]);

758

__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,

759

"Input is not proper UTF-8, indicate encoding !\n%s",

760

BAD_CAST buffer, NULL);

761

}

762

ctxt->charset = XML_CHAR_ENCODING_8859_1;

763

*len = 1;

764

return((int) *ctxt->input->cur);

765

}

766

767

/**

768

* xmlStringCurrentChar:

769

* @ctxt: the XML parser context

770

* @cur: pointer to the beginning of the char

771

* @len: pointer to the length of the char read

772

*

773

* The current char value, if using UTF-8 this may actually span multiple

774

* bytes in the input buffer.

775

*

776

* Returns the current char value and its length

777

*/

778

779

int

780

xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)

781

{

782

if ((len == NULL) || (cur == NULL)) return(0);

783

if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {

784

/*

785

* We are supposed to handle UTF8, check it's valid

786

* From rfc2044: encoding of the Unicode values on UTF-8:

787

*

788

* UCS-4 range (hex.) UTF-8 octet sequence (binary)

789

* 0000 0000-0000 007F 0xxxxxxx

790

* 0000 0080-0000 07FF 110xxxxx 10xxxxxx

791

* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx

792

*

793

* Check for the 0x110000 limit too

794

*/

795

unsigned char c;

796

unsigned int val;

797

798

c = *cur;

799

if (c & 0x80) {

800

if ((cur[1] & 0xc0) != 0x80)

801

goto encoding_error;

802

if ((c & 0xe0) == 0xe0) {

803

804

if ((cur[2] & 0xc0) != 0x80)

805

goto encoding_error;

806

if ((c & 0xf0) == 0xf0) {

807

if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))

808

goto encoding_error;

809

/* 4-byte code */

810

*len = 4;

811

val = (cur[0] & 0x7) << 18;

812

val |= (cur[1] & 0x3f) << 12;

813

val |= (cur[2] & 0x3f) << 6;

814

val |= cur[3] & 0x3f;

815

} else {

816

/* 3-byte code */

817

*len = 3;

818

val = (cur[0] & 0xf) << 12;

819

val |= (cur[1] & 0x3f) << 6;

820

val |= cur[2] & 0x3f;

821

}

822

} else {

823

/* 2-byte code */

824

*len = 2;

825

val = (cur[0] & 0x1f) << 6;

826

val |= cur[1] & 0x3f;

827

}

828

if (!IS_CHAR(val)) {

829

xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,

830

"Char 0x%X out of allowed range\n", val);

831

}

832

return (val);

833

} else {

834

/* 1-byte code */

835

*len = 1;

836

return ((int) *cur);

837

}

838

}

839

/*

840

* Assume it's a fixed length encoding (1) with

841

* a compatible encoding for the ASCII set, since

842

* XML constructs only use < 128 chars

843

*/

844

*len = 1;

845

return ((int) *cur);

846

encoding_error:

847

848

/*

849

* An encoding problem may arise from a truncated input buffer

850

* splitting a character in the middle. In that case do not raise

851

* an error but return 0 to endicate an end of stream problem

852

*/

853

if ((ctxt == NULL) || (ctxt->input == NULL) ||

854

(ctxt->input->end - ctxt->input->cur < 4)) {

855

*len = 0;

856

return(0);

857

}

858

/*

859

* If we detect an UTF8 error that probably mean that the

860

* input encoding didn't get properly advertised in the

861

* declaration header. Report the error and switch the encoding

862

* to ISO-Latin-1 (if you don't like this policy, just declare the

863

* encoding !)

864

*/

865

{

866

char buffer[150];

867

868

snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",

869

ctxt->input->cur[0], ctxt->input->cur[1],

870

ctxt->input->cur[2], ctxt->input->cur[3]);

871

__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,

872

"Input is not proper UTF-8, indicate encoding !\n%s",

873

BAD_CAST buffer, NULL);

874

}

875

*len = 1;

876

return ((int) *cur);

877

}

878

879

/**

880

* xmlCopyCharMultiByte:

881

* @out: pointer to an array of xmlChar

882

* @val: the char value

883

*

884

* append the char value in the array

885

*

886

* Returns the number of xmlChar written

887

*/

888

int

889

xmlCopyCharMultiByte(xmlChar *out, int val) {

890

if (out == NULL) return(0);

891

/*

892

* We are supposed to handle UTF8, check it's valid

893

* From rfc2044: encoding of the Unicode values on UTF-8:

894

*

895

* UCS-4 range (hex.) UTF-8 octet sequence (binary)

896

* 0000 0000-0000 007F 0xxxxxxx

897

* 0000 0080-0000 07FF 110xxxxx 10xxxxxx

898

* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx

899

*/

900

if (val >= 0x80) {

901

xmlChar *savedout = out;

902

int bits;

903

if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }

904

else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}

905

else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }

906

else {

907

xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,

908

"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",

909

val);

910

return(0);

911

}

912

for ( ; bits >= 0; bits-= 6)

913

*out++= ((val >> bits) & 0x3F) | 0x80 ;

914

return (out - savedout);

915

}

916

*out = (xmlChar) val;

917

return 1;

918

}

919

920

/**

921

* xmlCopyChar:

922

* @len: Ignored, compatibility

923

* @out: pointer to an array of xmlChar

924

* @val: the char value

925

*

926

* append the char value in the array

927

*

928

* Returns the number of xmlChar written

929

*/

930

931

int

932

xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {

933

if (out == NULL) return(0);

934

/* the len parameter is ignored */

935

if (val >= 0x80) {

936

return(xmlCopyCharMultiByte (out, val));

937

}

938

*out = (xmlChar) val;

939

return 1;

940

}

941

942

/************************************************************************

943

* *

944

* Commodity functions to switch encodings *

945

* *

946

************************************************************************/

947

948

/* defined in encoding.c, not public */

949

int

950

xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,

951

xmlBufferPtr in, int len);

952

953

static int

954

xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,

955

xmlCharEncodingHandlerPtr handler, int len);

956

static int

957

xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,

958

xmlCharEncodingHandlerPtr handler, int len);

959

/**

960

* xmlSwitchEncoding:

961

* @ctxt: the parser context

962

* @enc: the encoding value (number)

963

*

964

* change the input functions when discovering the character encoding

965

* of a given entity.

966

*

967

* Returns 0 in case of success, -1 otherwise

968

*/

969

int

970

xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)

971

{

972

xmlCharEncodingHandlerPtr handler;

973

int len = -1;

974

975

if (ctxt == NULL) return(-1);

976

switch (enc) {

977

case XML_CHAR_ENCODING_ERROR:

978

__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,

979

"encoding unknown\n", NULL, NULL);

980

return(-1);

981

case XML_CHAR_ENCODING_NONE:

982

/* let's assume it's UTF-8 without the XML decl */

983

ctxt->charset = XML_CHAR_ENCODING_UTF8;

984

return(0);

985

case XML_CHAR_ENCODING_UTF8:

986

/* default encoding, no conversion should be needed */

987

ctxt->charset = XML_CHAR_ENCODING_UTF8;

988

989

/*

990

* Errata on XML-1.0 June 20 2001

991

* Specific handling of the Byte Order Mark for

992

* UTF-8

993

*/

994

if ((ctxt->input != NULL) &&

995

(ctxt->input->cur[0] == 0xEF) &&

996

(ctxt->input->cur[1] == 0xBB) &&

997

(ctxt->input->cur[2] == 0xBF)) {

998

ctxt->input->cur += 3;

999

}

1000

return(0);

1001

case XML_CHAR_ENCODING_UTF16LE:

1002

case XML_CHAR_ENCODING_UTF16BE:

1003

/*The raw input characters are encoded

1004

*in UTF-16. As we expect this function

1005

*to be called after xmlCharEncInFunc, we expect

1006

*ctxt->input->cur to contain UTF-8 encoded characters.

1007

*So the raw UTF16 Byte Order Mark

1008

*has also been converted into

1009

*an UTF-8 BOM. Let's skip that BOM.

1010

*/

1011

if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&

1012

(ctxt->input->cur[0] == 0xEF) &&

1013

(ctxt->input->cur[1] == 0xBB) &&

1014

(ctxt->input->cur[2] == 0xBF)) {

1015

ctxt->input->cur += 3;

1016

}

1017

len = 90;

1018

break;

1019

case XML_CHAR_ENCODING_UCS2:

1020

len = 90;

1021

break;

1022

case XML_CHAR_ENCODING_UCS4BE:

1023

case XML_CHAR_ENCODING_UCS4LE:

1024

case XML_CHAR_ENCODING_UCS4_2143:

1025

case XML_CHAR_ENCODING_UCS4_3412:

1026

len = 180;

1027

break;

1028

case XML_CHAR_ENCODING_EBCDIC:

1029

case XML_CHAR_ENCODING_8859_1:

1030

case XML_CHAR_ENCODING_8859_2:

1031

case XML_CHAR_ENCODING_8859_3:

1032

case XML_CHAR_ENCODING_8859_4:

1033

case XML_CHAR_ENCODING_8859_5:

1034

case XML_CHAR_ENCODING_8859_6:

1035

case XML_CHAR_ENCODING_8859_7:

1036

case XML_CHAR_ENCODING_8859_8:

1037

case XML_CHAR_ENCODING_8859_9:

1038

case XML_CHAR_ENCODING_ASCII:

1039

case XML_CHAR_ENCODING_2022_JP:

1040

case XML_CHAR_ENCODING_SHIFT_JIS:

1041

case XML_CHAR_ENCODING_EUC_JP:

1042

len = 45;

1043

break;

1044

}

1045

handler = xmlGetCharEncodingHandler(enc);

1046

if (handler == NULL) {

1047

/*

1048

* Default handlers.

1049

*/

1050

switch (enc) {

1051

case XML_CHAR_ENCODING_ASCII:

1052

/* default encoding, no conversion should be needed */

1053

ctxt->charset = XML_CHAR_ENCODING_UTF8;

1054

return(0);

1055

case XML_CHAR_ENCODING_UTF16LE:

1056

break;

1057

case XML_CHAR_ENCODING_UTF16BE:

1058

break;

1059

case XML_CHAR_ENCODING_UCS4LE:

1060

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1061

"encoding not supported %s\n",

1062

BAD_CAST "USC4 little endian", NULL);

1063

break;

1064

case XML_CHAR_ENCODING_UCS4BE:

1065

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1066

"encoding not supported %s\n",

1067

BAD_CAST "USC4 big endian", NULL);

1068

break;

1069

case XML_CHAR_ENCODING_EBCDIC:

1070

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1071

"encoding not supported %s\n",

1072

BAD_CAST "EBCDIC", NULL);

1073

break;

1074

case XML_CHAR_ENCODING_UCS4_2143:

1075

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1076

"encoding not supported %s\n",

1077

BAD_CAST "UCS4 2143", NULL);

1078

break;

1079

case XML_CHAR_ENCODING_UCS4_3412:

1080

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1081

"encoding not supported %s\n",

1082

BAD_CAST "UCS4 3412", NULL);

1083

break;

1084

case XML_CHAR_ENCODING_UCS2:

1085

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1086

"encoding not supported %s\n",

1087

BAD_CAST "UCS2", NULL);

1088

break;

1089

case XML_CHAR_ENCODING_8859_1:

1090

case XML_CHAR_ENCODING_8859_2:

1091

case XML_CHAR_ENCODING_8859_3:

1092

case XML_CHAR_ENCODING_8859_4:

1093

case XML_CHAR_ENCODING_8859_5:

1094

case XML_CHAR_ENCODING_8859_6:

1095

case XML_CHAR_ENCODING_8859_7:

1096

case XML_CHAR_ENCODING_8859_8:

1097

case XML_CHAR_ENCODING_8859_9:

1098

/*

1099

* We used to keep the internal content in the

1100

* document encoding however this turns being unmaintainable

1101

* So xmlGetCharEncodingHandler() will return non-null

1102

* values for this now.

1103

*/

1104

if ((ctxt->inputNr == 1) &&

1105

(ctxt->encoding == NULL) &&

1106

(ctxt->input != NULL) &&

1107

(ctxt->input->encoding != NULL)) {

1108

ctxt->encoding = xmlStrdup(ctxt->input->encoding);

1109

}

1110

ctxt->charset = enc;

1111

return(0);

1112

case XML_CHAR_ENCODING_2022_JP:

1113

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1114

"encoding not supported %s\n",

1115

BAD_CAST "ISO-2022-JP", NULL);

1116

break;

1117

case XML_CHAR_ENCODING_SHIFT_JIS:

1118

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1119

"encoding not supported %s\n",

1120

BAD_CAST "Shift_JIS", NULL);

1121

break;

1122

case XML_CHAR_ENCODING_EUC_JP:

1123

__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

1124

"encoding not supported %s\n",

1125

BAD_CAST "EUC-JP", NULL);

1126

break;

1127

default:

1128

break;

1129

}

1130

}

1131

if (handler == NULL)

1132

return(-1);

1133

ctxt->charset = XML_CHAR_ENCODING_UTF8;

1134

return(xmlSwitchToEncodingInt(ctxt, handler, len));

1135

}

1136

1137

/**

1138

* xmlSwitchInputEncoding:

1139

* @ctxt: the parser context

1140

* @input: the input stream

1141

* @handler: the encoding handler

1142

* @len: the number of bytes to convert for the first line or -1

1143

*

1144

* change the input functions when discovering the character encoding

1145

* of a given entity.

1146

*

1147

* Returns 0 in case of success, -1 otherwise

1148

*/

1149

static int

1150

xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,

1151

xmlCharEncodingHandlerPtr handler, int len)

1152

{

1153

int nbchars;

1154

1155

if (handler == NULL)

1156

return (-1);

1157

if (input == NULL)

1158

return (-1);

1159

if (input->buf != NULL) {

1160

if (input->buf->encoder != NULL) {

1161

/*

1162

* Check in case the auto encoding detetection triggered

1163

* in already.

1164

*/

1165

if (input->buf->encoder == handler)

1166

return (0);

1167

1168

/*

1169

* "UTF-16" can be used for both LE and BE

1170

if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,

1171

BAD_CAST "UTF-16", 6)) &&

1172

(!xmlStrncmp(BAD_CAST handler->name,

1173

BAD_CAST "UTF-16", 6))) {

1174

return(0);

1175

}

1176

*/

1177

1178

/*

1179

* Note: this is a bit dangerous, but that's what it

1180

* takes to use nearly compatible signature for different

1181

* encodings.

1182

*/

1183

xmlCharEncCloseFunc(input->buf->encoder);

1184

input->buf->encoder = handler;

1185

return (0);

1186

}

1187

input->buf->encoder = handler;

1188

1189

/*

1190

* Is there already some content down the pipe to convert ?

1191

*/

1192

if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {

1193

int processed;

1194

unsigned int use;

1195

1196

/*

1197

* Specific handling of the Byte Order Mark for

1198

* UTF-16

1199

*/

1200

if ((handler->name != NULL) &&

1201

(!strcmp(handler->name, "UTF-16LE") ||

1202

!strcmp(handler->name, "UTF-16")) &&

1203

(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {

1204

input->cur += 2;

1205

}

1206

if ((handler->name != NULL) &&

1207

(!strcmp(handler->name, "UTF-16BE")) &&

1208

(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {

1209

input->cur += 2;

1210

}

1211

/*

1212

* Errata on XML-1.0 June 20 2001

1213

* Specific handling of the Byte Order Mark for

1214

* UTF-8

1215

*/

1216

if ((handler->name != NULL) &&

1217

(!strcmp(handler->name, "UTF-8")) &&

1218

(input->cur[0] == 0xEF) &&

1219

(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {

1220

input->cur += 3;

1221

}

1222

1223

/*

1224

* Shrink the current input buffer.

1225

* Move it as the raw buffer and create a new input buffer

1226

*/

1227

processed = input->cur - input->base;

1228

xmlBufferShrink(input->buf->buffer, processed);

1229

input->buf->raw = input->buf->buffer;

1230

input->buf->buffer = xmlBufferCreate();

1231

input->buf->rawconsumed = processed;

1232

use = input->buf->raw->use;

1233

1234

if (ctxt->html) {

1235

/*

1236

* convert as much as possible of the buffer

1237

*/

1238

nbchars = xmlCharEncInFunc(input->buf->encoder,

1239

input->buf->buffer,

1240

input->buf->raw);

1241

} else {

1242

/*

1243

* convert just enough to get

1244

* '<?xml version="1.0" encoding="xxx"?>'

1245

* parsed with the autodetected encoding

1246

* into the parser reading buffer.

1247

*/

1248

nbchars = xmlCharEncFirstLineInt(input->buf->encoder,

1249

input->buf->buffer,

1250

input->buf->raw,

1251

len);

1252

}

1253

if (nbchars < 0) {

1254

xmlErrInternal(ctxt,

1255

"switching encoding: encoder error\n",

1256

NULL);

1257

return (-1);

1258

}

1259

input->buf->rawconsumed += use - input->buf->raw->use;

1260

input->base = input->cur = input->buf->buffer->content;

1261

input->end = &input->base[input->buf->buffer->use];

1262

1263

}

1264

return (0);

1265

} else if (input->length == 0) {

1266

/*

1267

* When parsing a static memory array one must know the

1268

* size to be able to convert the buffer.

1269

*/

1270

xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);

1271

return (-1);

1272

}

1273

return (0);

1274

}

1275

1276

/**

1277

* xmlSwitchInputEncoding:

1278

* @ctxt: the parser context

1279

* @input: the input stream

1280

* @handler: the encoding handler

1281

*

1282

* change the input functions when discovering the character encoding

1283

* of a given entity.

1284

*

1285

* Returns 0 in case of success, -1 otherwise

1286

*/

1287

int

1288

xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,

1289

xmlCharEncodingHandlerPtr handler) {

1290

return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));

1291

}

1292

1293

/**

1294

* xmlSwitchToEncodingInt:

1295

* @ctxt: the parser context

1296

* @handler: the encoding handler

1297

* @len: the lenght to convert or -1

1298

*

1299

* change the input functions when discovering the character encoding

1300

* of a given entity, and convert only @len bytes of the output, this

1301

* is needed on auto detect to allows any declared encoding later to

1302

* convert the actual content after the xmlDecl

1303

*

1304

* Returns 0 in case of success, -1 otherwise

1305

*/

1306

static int

1307

xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,

1308

xmlCharEncodingHandlerPtr handler, int len) {

1309

int ret = 0;

1310

1311

if (handler != NULL) {

1312

if (ctxt->input != NULL) {

1313

ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);

1314

} else {

1315

xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",

1316

NULL);

1317

return(-1);

1318

}

1319

/*

1320

* The parsing is now done in UTF8 natively

1321

*/

1322

ctxt->charset = XML_CHAR_ENCODING_UTF8;

1323

} else

1324

return(-1);

1325

return(ret);

1326

}

1327

1328

/**

1329

* xmlSwitchToEncoding:

1330

* @ctxt: the parser context

1331

* @handler: the encoding handler

1332

*

1333

* change the input functions when discovering the character encoding

1334

* of a given entity.

1335

*

1336

* Returns 0 in case of success, -1 otherwise

1337

*/

1338

int

1339

xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)

1340

{

1341

return (xmlSwitchToEncodingInt(ctxt, handler, -1));

1342

}

1343

1344

/************************************************************************

1345

* *

1346

* Commodity functions to handle entities processing *

1347

* *

1348

************************************************************************/

1349

1350

/**

1351

* xmlFreeInputStream:

1352

* @input: an xmlParserInputPtr

1353

*

1354

* Free up an input stream.

1355

*/

1356

void

1357

xmlFreeInputStream(xmlParserInputPtr input) {

1358

if (input == NULL) return;

1359

1360

if (input->filename != NULL) xmlFree((char *) input->filename);

1361

if (input->directory != NULL) xmlFree((char *) input->directory);

1362

if (input->encoding != NULL) xmlFree((char *) input->encoding);

1363

if (input->version != NULL) xmlFree((char *) input->version);

1364

if ((input->free != NULL) && (input->base != NULL))

1365

input->free((xmlChar *) input->base);

1366

if (input->buf != NULL)

1367

xmlFreeParserInputBuffer(input->buf);

1368

xmlFree(input);

1369

}

1370

1371

/**

1372

* xmlNewInputStream:

1373

* @ctxt: an XML parser context

1374

*

1375

* Create a new input stream structure.

1376

*

1377

* Returns the new input stream or NULL

1378

*/

1379

xmlParserInputPtr

1380

xmlNewInputStream(xmlParserCtxtPtr ctxt) {

1381

xmlParserInputPtr input;

1382

1383

input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));

1384

if (input == NULL) {

1385

xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");

1386

return(NULL);

1387

}

1388

memset(input, 0, sizeof(xmlParserInput));

1389

input->line = 1;

1390

input->col = 1;

1391

input->standalone = -1;

1392

1393

/*

1394

* If the context is NULL the id cannot be initialized, but that

1395

* should not happen while parsing which is the situation where

1396

* the id is actually needed.

1397

*/

1398

if (ctxt != NULL)

1399

input->id = ctxt->input_id++;

1400

1401

return(input);

1402

}

1403

1404

/**

1405

* xmlNewIOInputStream:

1406

* @ctxt: an XML parser context

1407

* @input: an I/O Input

1408

* @enc: the charset encoding if known

1409

*

1410

* Create a new input stream structure encapsulating the @input into

1411

* a stream suitable for the parser.

1412

*

1413

* Returns the new input stream or NULL

1414

*/

1415

xmlParserInputPtr

1416

xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,

1417

xmlCharEncoding enc) {

1418

xmlParserInputPtr inputStream;

1419

1420

if (input == NULL) return(NULL);

1421

if (xmlParserDebugEntities)

1422

xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");

1423

inputStream = xmlNewInputStream(ctxt);

1424

if (inputStream == NULL) {

1425

return(NULL);

1426

}

1427

inputStream->filename = NULL;

1428

inputStream->buf = input;

1429

inputStream->base = inputStream->buf->buffer->content;

1430

inputStream->cur = inputStream->buf->buffer->content;

1431

inputStream->end = &inputStream->base[inputStream->buf->buffer->use];

1432

if (enc != XML_CHAR_ENCODING_NONE) {

1433

xmlSwitchEncoding(ctxt, enc);

1434

}

1435

1436

return(inputStream);

1437

}

1438

1439

/**

1440

* xmlNewEntityInputStream:

1441

* @ctxt: an XML parser context

1442

* @entity: an Entity pointer

1443

*

1444

* Create a new input stream based on an xmlEntityPtr

1445

*

1446

* Returns the new input stream or NULL

1447

*/

1448

xmlParserInputPtr

1449

xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {

1450

xmlParserInputPtr input;

1451

1452

if (entity == NULL) {

1453

xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",

1454

NULL);

1455

return(NULL);

1456

}

1457

if (xmlParserDebugEntities)

1458

xmlGenericError(xmlGenericErrorContext,

1459

"new input from entity: %s\n", entity->name);

1460

if (entity->content == NULL) {

1461

switch (entity->etype) {

1462

case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:

1463

xmlErrInternal(ctxt, "Cannot parse entity %s\n",

1464

entity->name);

1465

break;

1466

case XML_EXTERNAL_GENERAL_PARSED_ENTITY:

1467

case XML_EXTERNAL_PARAMETER_ENTITY:

1468

return(xmlLoadExternalEntity((char *) entity->URI,

1469

(char *) entity->ExternalID, ctxt));

1470

case XML_INTERNAL_GENERAL_ENTITY:

1471

xmlErrInternal(ctxt,

1472

"Internal entity %s without content !\n",

1473

entity->name);

1474

break;

1475

case XML_INTERNAL_PARAMETER_ENTITY:

1476

xmlErrInternal(ctxt,

1477

"Internal parameter entity %s without content !\n",

1478

entity->name);

1479

break;

1480

case XML_INTERNAL_PREDEFINED_ENTITY:

1481

xmlErrInternal(ctxt,

1482

"Predefined entity %s without content !\n",

1483

entity->name);

1484

break;

1485

}

1486

return(NULL);

1487

}

1488

input = xmlNewInputStream(ctxt);

1489

if (input == NULL) {

1490

return(NULL);

1491

}

1492

if (entity->URI != NULL)

1493

input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);

1494

input->base = entity->content;

1495

input->cur = entity->content;

1496

input->length = entity->length;

1497

input->end = &entity->content[input->length];

1498

return(input);

1499

}

1500

1501

/**

1502

* xmlNewStringInputStream:

1503

* @ctxt: an XML parser context

1504

* @buffer: an memory buffer

1505

*

1506

* Create a new input stream based on a memory buffer.

1507

* Returns the new input stream

1508

*/

1509

xmlParserInputPtr

1510

xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {

1511

xmlParserInputPtr input;

1512

1513

if (buffer == NULL) {

1514

xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",

1515

NULL);

1516

return(NULL);

1517

}

1518

if (xmlParserDebugEntities)

1519

xmlGenericError(xmlGenericErrorContext,

1520

"new fixed input: %.30s\n", buffer);

1521

input = xmlNewInputStream(ctxt);

1522

if (input == NULL) {

1523

xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");

1524

return(NULL);

1525

}

1526

input->base = buffer;

1527

input->cur = buffer;

1528

input->length = xmlStrlen(buffer);

1529

input->end = &buffer[input->length];

1530

return(input);

1531

}

1532

1533

/**

1534

* xmlNewInputFromFile:

1535

* @ctxt: an XML parser context

1536

* @filename: the filename to use as entity

1537

*

1538

* Create a new input stream based on a file or an URL.

1539

*

1540

* Returns the new input stream or NULL in case of error

1541

*/

1542

xmlParserInputPtr

1543

xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {

1544

xmlParserInputBufferPtr buf;

1545

xmlParserInputPtr inputStream;

1546

char *directory = NULL;

1547

xmlChar *URI = NULL;

1548

1549

if (xmlParserDebugEntities)

1550

xmlGenericError(xmlGenericErrorContext,

1551

"new input from file: %s\n", filename);

1552

if (ctxt == NULL) return(NULL);

1553

buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);

1554

if (buf == NULL) {

1555

if (filename == NULL)

1556

__xmlLoaderErr(ctxt,

1557

"failed to load external entity: NULL filename \n",

1558

NULL);

1559

else

1560

__xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",

1561

(const char *) filename);

1562

return(NULL);

1563

}

1564

1565

inputStream = xmlNewInputStream(ctxt);

1566

if (inputStream == NULL)

1567

return(NULL);

1568

1569

inputStream->buf = buf;

1570

inputStream = xmlCheckHTTPInput(ctxt, inputStream);

1571

if (inputStream == NULL)

1572

return(NULL);

1573

1574

if (inputStream->filename == NULL)

1575

URI = xmlStrdup((xmlChar *) filename);

1576

else

1577

URI = xmlStrdup((xmlChar *) inputStream->filename);

1578

directory = xmlParserGetDirectory((const char *) URI);

1579

if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);

1580

inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);

1581

if (URI != NULL) xmlFree((char *) URI);

1582

inputStream->directory = directory;

1583

1584

inputStream->base = inputStream->buf->buffer->content;

1585

inputStream->cur = inputStream->buf->buffer->content;

1586

inputStream->end = &inputStream->base[inputStream->buf->buffer->use];

1587

if ((ctxt->directory == NULL) && (directory != NULL))

1588

ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);

1589

return(inputStream);

1590

}

1591

1592

/************************************************************************

1593

* *

1594

* Commodity functions to handle parser contexts *

1595

* *

1596

************************************************************************/

1597

1598

/**

1599

* xmlInitParserCtxt:

1600

* @ctxt: an XML parser context

1601

*

1602

* Initialize a parser context

1603

*

1604

* Returns 0 in case of success and -1 in case of error

1605

*/

1606

1607

int

1608

xmlInitParserCtxt(xmlParserCtxtPtr ctxt)

1609

{

1610

xmlParserInputPtr input;

1611

1612

if(ctxt==NULL) {

1613

xmlErrInternal(NULL, "Got NULL parser context\n", NULL);

1614

return(-1);

1615

}

1616

1617

xmlDefaultSAXHandlerInit();

1618

1619

if (ctxt->dict == NULL)

1620

ctxt->dict = xmlDictCreate();

1621

if (ctxt->dict == NULL) {

1622

xmlErrMemory(NULL, "cannot initialize parser context\n");

1623

return(-1);

1624

}

1625

if (ctxt->sax == NULL)

1626

ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));

1627

if (ctxt->sax == NULL) {

1628

xmlErrMemory(NULL, "cannot initialize parser context\n");

1629

return(-1);

1630

}

1631

else

1632

xmlSAXVersion(ctxt->sax, 2);

1633

1634

ctxt->maxatts = 0;

1635

ctxt->atts = NULL;

1636

/* Allocate the Input stack */

1637

if (ctxt->inputTab == NULL) {

1638

ctxt->inputTab = (xmlParserInputPtr *)

1639

xmlMalloc(5 * sizeof(xmlParserInputPtr));

1640

ctxt->inputMax = 5;

1641

}

1642

if (ctxt->inputTab == NULL) {

1643

xmlErrMemory(NULL, "cannot initialize parser context\n");

1644

ctxt->inputNr = 0;

1645

ctxt->inputMax = 0;

1646

ctxt->input = NULL;

1647

return(-1);

1648

}

1649

while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */

1650

xmlFreeInputStream(input);

1651

}

1652

ctxt->inputNr = 0;

1653

ctxt->input = NULL;

1654

1655

ctxt->version = NULL;

1656

ctxt->encoding = NULL;

1657

ctxt->standalone = -1;

1658

ctxt->hasExternalSubset = 0;

1659

ctxt->hasPErefs = 0;

1660

ctxt->html = 0;

1661

ctxt->external = 0;

1662

ctxt->instate = XML_PARSER_START;

1663

ctxt->token = 0;

1664

ctxt->directory = NULL;

1665

1666

/* Allocate the Node stack */

1667

if (ctxt->nodeTab == NULL) {

1668

ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));

1669

ctxt->nodeMax = 10;

1670

}

1671

if (ctxt->nodeTab == NULL) {

1672

xmlErrMemory(NULL, "cannot initialize parser context\n");

1673

ctxt->nodeNr = 0;

1674

ctxt->nodeMax = 0;

1675

ctxt->node = NULL;

1676

ctxt->inputNr = 0;

1677

ctxt->inputMax = 0;

1678

ctxt->input = NULL;

1679

return(-1);

1680

}

1681

ctxt->nodeNr = 0;

1682

ctxt->node = NULL;

1683

1684

/* Allocate the Name stack */

1685

if (ctxt->nameTab == NULL) {

1686

ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));

1687

ctxt->nameMax = 10;

1688

}

1689

if (ctxt->nameTab == NULL) {

1690

xmlErrMemory(NULL, "cannot initialize parser context\n");

1691

ctxt->nodeNr = 0;

1692

ctxt->nodeMax = 0;

1693

ctxt->node = NULL;

1694

ctxt->inputNr = 0;

1695

ctxt->inputMax = 0;

1696

ctxt->input = NULL;

1697

ctxt->nameNr = 0;

1698

ctxt->nameMax = 0;

1699

ctxt->name = NULL;

1700

return(-1);

1701

}

1702

ctxt->nameNr = 0;

1703

ctxt->name = NULL;

1704

1705

/* Allocate the space stack */

1706

if (ctxt->spaceTab == NULL) {

1707

ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));

1708

ctxt->spaceMax = 10;

1709

}

1710

if (ctxt->spaceTab == NULL) {

1711

xmlErrMemory(NULL, "cannot initialize parser context\n");

1712

ctxt->nodeNr = 0;

1713

ctxt->nodeMax = 0;

1714

ctxt->node = NULL;

1715

ctxt->inputNr = 0;

1716

ctxt->inputMax = 0;

1717

ctxt->input = NULL;

1718

ctxt->nameNr = 0;

1719

ctxt->nameMax = 0;

1720

ctxt->name = NULL;

1721

ctxt->spaceNr = 0;

1722

ctxt->spaceMax = 0;

1723

ctxt->space = NULL;

1724

return(-1);

1725

}

1726

ctxt->spaceNr = 1;

1727

ctxt->spaceMax = 10;

1728

ctxt->spaceTab[0] = -1;

1729

ctxt->space = &ctxt->spaceTab[0];

1730

ctxt->userData = ctxt;

1731

ctxt->myDoc = NULL;

1732

ctxt->wellFormed = 1;

1733

ctxt->nsWellFormed = 1;

1734

ctxt->valid = 1;

1735

ctxt->loadsubset = xmlLoadExtDtdDefaultValue;

1736

ctxt->validate = xmlDoValidityCheckingDefaultValue;

1737

ctxt->pedantic = xmlPedanticParserDefaultValue;

1738

ctxt->linenumbers = xmlLineNumbersDefaultValue;

1739

ctxt->keepBlanks = xmlKeepBlanksDefaultValue;

1740

if (ctxt->keepBlanks == 0)

1741

ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;

1742

1743

ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;

1744

ctxt->vctxt.userData = ctxt;

1745

ctxt->vctxt.error = xmlParserValidityError;

1746

ctxt->vctxt.warning = xmlParserValidityWarning;

1747

if (ctxt->validate) {

1748

if (xmlGetWarningsDefaultValue == 0)

1749

ctxt->vctxt.warning = NULL;

1750

else

1751

ctxt->vctxt.warning = xmlParserValidityWarning;

1752

ctxt->vctxt.nodeMax = 0;

1753

}

1754

ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;

1755

ctxt->record_info = 0;

1756

ctxt->nbChars = 0;

1757

ctxt->checkIndex = 0;

1758

ctxt->inSubset = 0;

1759

ctxt->errNo = XML_ERR_OK;

1760

ctxt->depth = 0;

1761

ctxt->charset = XML_CHAR_ENCODING_UTF8;

1762

ctxt->catalogs = NULL;

1763

ctxt->nbentities = 0;

1764

ctxt->input_id = 1;

1765

xmlInitNodeInfoSeq(&ctxt->node_seq);

1766

return(0);

1767

}

1768

1769

/**

1770

* xmlFreeParserCtxt:

1771

* @ctxt: an XML parser context

1772

*

1773

* Free all the memory used by a parser context. However the parsed

1774

* document in ctxt->myDoc is not freed.

1775

*/

1776

1777

void

1778

xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)

1779

{

1780

xmlParserInputPtr input;

1781

1782

if (ctxt == NULL) return;

1783

1784

while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */

1785

xmlFreeInputStream(input);

1786

}

1787

if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);

1788

if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);

1789

if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);

1790

if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);

1791

if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);

1792

if (ctxt->version != NULL) xmlFree((char *) ctxt->version);

1793

if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);

1794

if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);

1795

if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);

1796

#ifdef LIBXML_SAX1_ENABLED

1797

if ((ctxt->sax != NULL) &&

1798

(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))

1799

#else

1800

if (ctxt->sax != NULL)

1801

#endif /* LIBXML_SAX1_ENABLED */

1802

xmlFree(ctxt->sax);

1803

if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);

1804

if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);

1805

if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);

1806

if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);

1807

if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);

1808

if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);

1809

if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);

1810

if (ctxt->attsDefault != NULL)

1811

xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);

1812

if (ctxt->attsSpecial != NULL)

1813

xmlHashFree(ctxt->attsSpecial, NULL);

1814

if (ctxt->freeElems != NULL) {

1815

xmlNodePtr cur, next;

1816

1817

cur = ctxt->freeElems;

1818

while (cur != NULL) {

1819

next = cur->next;

1820

xmlFree(cur);

1821

cur = next;

1822

}

1823

}

1824

if (ctxt->freeAttrs != NULL) {

1825

xmlAttrPtr cur, next;

1826

1827

cur = ctxt->freeAttrs;

1828

while (cur != NULL) {

1829

next = cur->next;

1830

xmlFree(cur);

1831

cur = next;

1832

}

1833

}

1834

/*

1835

* cleanup the error strings

1836

*/

1837

if (ctxt->lastError.message != NULL)

1838

xmlFree(ctxt->lastError.message);

1839

if (ctxt->lastError.file != NULL)

1840

xmlFree(ctxt->lastError.file);

1841

if (ctxt->lastError.str1 != NULL)

1842

xmlFree(ctxt->lastError.str1);

1843

if (ctxt->lastError.str2 != NULL)

1844

xmlFree(ctxt->lastError.str2);

1845

if (ctxt->lastError.str3 != NULL)

1846

xmlFree(ctxt->lastError.str3);

1847

1848

#ifdef LIBXML_CATALOG_ENABLED

1849

if (ctxt->catalogs != NULL)

1850

xmlCatalogFreeLocal(ctxt->catalogs);

1851

#endif

1852

xmlFree(ctxt);

1853

}

1854

1855

/**

1856

* xmlNewParserCtxt:

1857

*

1858

* Allocate and initialize a new parser context.

1859

*

1860

* Returns the xmlParserCtxtPtr or NULL

1861

*/

1862

1863

xmlParserCtxtPtr

1864

xmlNewParserCtxt(void)

1865

{

1866

xmlParserCtxtPtr ctxt;

1867

1868

ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));

1869

if (ctxt == NULL) {

1870

xmlErrMemory(NULL, "cannot allocate parser context\n");

1871

return(NULL);

1872

}

1873

memset(ctxt, 0, sizeof(xmlParserCtxt));

1874

if (xmlInitParserCtxt(ctxt) < 0) {

1875

xmlFreeParserCtxt(ctxt);

1876

return(NULL);

1877

}

1878

return(ctxt);

1879

}

1880

1881

/************************************************************************

1882

* *

1883

* Handling of node informations *

1884

* *

1885

************************************************************************/

1886

1887

/**

1888

* xmlClearParserCtxt:

1889

* @ctxt: an XML parser context

1890

*

1891

* Clear (release owned resources) and reinitialize a parser context

1892

*/

1893

1894

void

1895

xmlClearParserCtxt(xmlParserCtxtPtr ctxt)

1896

{

1897

if (ctxt==NULL)

1898

return;

1899

xmlClearNodeInfoSeq(&ctxt->node_seq);

1900

xmlCtxtReset(ctxt);

1901

}

1902

1903

1904

/**

1905

* xmlParserFindNodeInfo:

1906

* @ctx: an XML parser context

1907

* @node: an XML node within the tree

1908

*

1909

* Find the parser node info struct for a given node

1910

*

1911

* Returns an xmlParserNodeInfo block pointer or NULL

1912

*/

1913

const xmlParserNodeInfo *

1914

xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)

1915

{

1916

unsigned long pos;

1917

1918

if ((ctx == NULL) || (node == NULL))

1919

return (NULL);

1920

/* Find position where node should be at */

1921

pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);

1922

if (pos < ctx->node_seq.length

1923

&& ctx->node_seq.buffer[pos].node == node)

1924

return &ctx->node_seq.buffer[pos];

1925

else

1926

return NULL;

1927

}

1928

1929

1930

/**

1931

* xmlInitNodeInfoSeq:

1932

* @seq: a node info sequence pointer

1933

*

1934

* -- Initialize (set to initial state) node info sequence

1935

*/

1936

void

1937

xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)

1938

{

1939

if (seq == NULL)

1940

return;

1941

seq->length = 0;

1942

seq->maximum = 0;

1943

seq->buffer = NULL;

1944

}

1945

1946

/**

1947

* xmlClearNodeInfoSeq:

1948

* @seq: a node info sequence pointer

1949

*

1950

* -- Clear (release memory and reinitialize) node

1951

* info sequence

1952

*/

1953

void

1954

xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)

1955

{

1956

if (seq == NULL)

1957

return;

1958

if (seq->buffer != NULL)

1959

xmlFree(seq->buffer);

1960

xmlInitNodeInfoSeq(seq);

1961

}

1962

1963

/**

1964

* xmlParserFindNodeInfoIndex:

1965

* @seq: a node info sequence pointer

1966

* @node: an XML node pointer

1967

*

1968

*

1969

* xmlParserFindNodeInfoIndex : Find the index that the info record for

1970

* the given node is or should be at in a sorted sequence

1971

*

1972

* Returns a long indicating the position of the record

1973

*/

1974

unsigned long

1975

xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,

1976

const xmlNodePtr node)

1977

{

1978

unsigned long upper, lower, middle;

1979

int found = 0;

1980

1981

if ((seq == NULL) || (node == NULL))

1982

return ((unsigned long) -1);

1983

1984

/* Do a binary search for the key */

1985

lower = 1;

1986

upper = seq->length;

1987

middle = 0;

1988

while (lower <= upper && !found) {

1989

middle = lower + (upper - lower) / 2;

1990

if (node == seq->buffer[middle - 1].node)

1991

found = 1;

1992

else if (node < seq->buffer[middle - 1].node)

1993

upper = middle - 1;

1994

else

1995

lower = middle + 1;

1996

}

1997

1998

/* Return position */

1999

if (middle == 0 || seq->buffer[middle - 1].node < node)

2000

return middle;

2001

else

2002

return middle - 1;

2003

}

2004

2005

2006

/**

2007

* xmlParserAddNodeInfo:

2008

* @ctxt: an XML parser context

2009

* @info: a node info sequence pointer

2010

*

2011

* Insert node info record into the sorted sequence

2012

*/

2013

void

2014

xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,

2015

const xmlParserNodeInfoPtr info)

2016

{

2017

unsigned long pos;

2018

2019

if ((ctxt == NULL) || (info == NULL)) return;

2020

2021

/* Find pos and check to see if node is already in the sequence */

2022

pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)

2023

info->node);

2024

2025

if ((pos < ctxt->node_seq.length) &&

2026

(ctxt->node_seq.buffer != NULL) &&

2027

(ctxt->node_seq.buffer[pos].node == info->node)) {

2028

ctxt->node_seq.buffer[pos] = *info;

2029

}

2030

2031

/* Otherwise, we need to add new node to buffer */

2032

else {

2033

if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {

2034

xmlParserNodeInfo *tmp_buffer;

2035

unsigned int byte_size;

2036

2037

if (ctxt->node_seq.maximum == 0)

2038

ctxt->node_seq.maximum = 2;

2039

byte_size = (sizeof(*ctxt->node_seq.buffer) *

2040

(2 * ctxt->node_seq.maximum));

2041

2042

if (ctxt->node_seq.buffer == NULL)

2043

tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);

2044

else

2045

tmp_buffer =

2046

(xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,

2047

byte_size);

2048

2049

if (tmp_buffer == NULL) {

2050

xmlErrMemory(ctxt, "failed to allocate buffer\n");

2051

return;

2052

}

2053

ctxt->node_seq.buffer = tmp_buffer;

2054

ctxt->node_seq.maximum *= 2;

2055

}

2056

2057

/* If position is not at end, move elements out of the way */

2058

if (pos != ctxt->node_seq.length) {

2059

unsigned long i;

2060

2061

for (i = ctxt->node_seq.length; i > pos; i--)

2062

ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];

2063

}

2064

2065

/* Copy element and increase length */

2066

ctxt->node_seq.buffer[pos] = *info;

2067

ctxt->node_seq.length++;

2068

}

2069

}

2070

2071

/************************************************************************

2072

* *

2073

* Defaults settings *

2074

* *

2075

************************************************************************/

2076

/**

2077

* xmlPedanticParserDefault:

2078

* @val: int 0 or 1

2079

*

2080

* Set and return the previous value for enabling pedantic warnings.

2081

*

2082

* Returns the last value for 0 for no substitution, 1 for substitution.

2083

*/

2084

2085

int

2086

xmlPedanticParserDefault(int val) {

2087

int old = xmlPedanticParserDefaultValue;

2088

2089

xmlPedanticParserDefaultValue = val;

2090

return(old);

2091

}

2092

2093

/**

2094

* xmlLineNumbersDefault:

2095

* @val: int 0 or 1

2096

*

2097

* Set and return the previous value for enabling line numbers in elements

2098

* contents. This may break on old application and is turned off by default.

2099

*

2100

* Returns the last value for 0 for no substitution, 1 for substitution.

2101

*/

2102

2103

int

2104

xmlLineNumbersDefault(int val) {

2105

int old = xmlLineNumbersDefaultValue;

2106

2107

xmlLineNumbersDefaultValue = val;

2108

return(old);

2109

}

2110

2111

/**

2112

* xmlSubstituteEntitiesDefault:

2113

* @val: int 0 or 1

2114

*

2115

* Set and return the previous value for default entity support.

2116

* Initially the parser always keep entity references instead of substituting

2117

* entity values in the output. This function has to be used to change the

2118

* default parser behavior

2119

* SAX::substituteEntities() has to be used for changing that on a file by

2120

* file basis.

2121

*

2122

* Returns the last value for 0 for no substitution, 1 for substitution.

2123

*/

2124

2125

int

2126

xmlSubstituteEntitiesDefault(int val) {

2127

int old = xmlSubstituteEntitiesDefaultValue;

2128

2129

xmlSubstituteEntitiesDefaultValue = val;

2130

return(old);

2131

}

2132

2133

/**

2134

* xmlKeepBlanksDefault:

2135

* @val: int 0 or 1

2136

*

2137

* Set and return the previous value for default blanks text nodes support.

2138

* The 1.x version of the parser used an heuristic to try to detect

2139

* ignorable white spaces. As a result the SAX callback was generating

2140

* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when

2141

* using the DOM output text nodes containing those blanks were not generated.

2142

* The 2.x and later version will switch to the XML standard way and

2143

* ignorableWhitespace() are only generated when running the parser in

2144

* validating mode and when the current element doesn't allow CDATA or

2145

* mixed content.

2146

* This function is provided as a way to force the standard behavior

2147

* on 1.X libs and to switch back to the old mode for compatibility when

2148

* running 1.X client code on 2.X . Upgrade of 1.X code should be done

2149

* by using xmlIsBlankNode() commodity function to detect the "empty"

2150

* nodes generated.

2151

* This value also affect autogeneration of indentation when saving code

2152

* if blanks sections are kept, indentation is not generated.

2153

*

2154

* Returns the last value for 0 for no substitution, 1 for substitution.

2155

*/

2156

2157

int

2158

xmlKeepBlanksDefault(int val) {

2159

int old = xmlKeepBlanksDefaultValue;

2160

2161

xmlKeepBlanksDefaultValue = val;

2162

if (!val) xmlIndentTreeOutput = 1;

2163

return(old);

2164

}

2165

2166

#define bottom_parserInternals

2167

#include "elfgcchack.h"