2
* parserInternals.c : Internal routines (and obsolete ones) needed for the
3
* XML and HTML parsers.
5
* See Copyright for the status of this software.
13
#if defined(WIN32) && !defined (__CYGWIN__)
14
#define XML_DIR_SEP '\\'
16
#define XML_DIR_SEP '/'
26
#ifdef HAVE_SYS_STAT_H
39
#include <libxml/xmlmemory.h>
40
#include <libxml/tree.h>
41
#include <libxml/parser.h>
42
#include <libxml/parserInternals.h>
43
#include <libxml/valid.h>
44
#include <libxml/entities.h>
45
#include <libxml/xmlerror.h>
46
#include <libxml/encoding.h>
47
#include <libxml/valid.h>
48
#include <libxml/xmlIO.h>
49
#include <libxml/uri.h>
50
#include <libxml/dict.h>
51
#include <libxml/SAX.h>
52
#ifdef LIBXML_CATALOG_ENABLED
53
#include <libxml/catalog.h>
55
#include <libxml/globals.h>
56
#include <libxml/chvalid.h>
62
* Various global defaults for parsing
67
* @version: the include version number
69
* check the compiled lib version against the include one.
70
* This can warn or immediately kill the application
73
xmlCheckVersion(int version) {
74
int myversion = (int) LIBXML_VERSION;
78
if ((myversion / 10000) != (version / 10000)) {
79
xmlGenericError(xmlGenericErrorContext,
80
"Fatal: program compiled against libxml %d using libxml %d\n",
81
(version / 10000), (myversion / 10000));
83
"Fatal: program compiled against libxml %d using libxml %d\n",
84
(version / 10000), (myversion / 10000));
86
if ((myversion / 100) < (version / 100)) {
87
xmlGenericError(xmlGenericErrorContext,
88
"Warning: program compiled against libxml %d using older %d\n",
89
(version / 100), (myversion / 100));
94
/************************************************************************
96
* Some factorized error routines *
98
************************************************************************/
103
* @ctxt: an XML parser context
104
* @extra: extra informations
106
* Handle a redefinition of attribute error
109
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
111
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
112
(ctxt->instate == XML_PARSER_EOF))
115
ctxt->errNo = XML_ERR_NO_MEMORY;
116
ctxt->instate = XML_PARSER_EOF;
117
ctxt->disableSAX = 1;
120
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
121
XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
123
"Memory allocation failed : %s\n", extra);
125
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
126
XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
127
NULL, NULL, 0, 0, "Memory allocation failed\n");
132
* @ctxt: an XML parser context
133
* @xmlerr: the error number
134
* @msg: the error message
135
* @str1: an string info
136
* @str2: an string info
138
* Handle an encoding error
141
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
142
const char *msg, const xmlChar * str1, const xmlChar * str2)
144
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145
(ctxt->instate == XML_PARSER_EOF))
148
ctxt->errNo = xmlerr;
149
__xmlRaiseError(NULL, NULL, NULL,
150
ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
151
NULL, 0, (const char *) str1, (const char *) str2,
152
NULL, 0, 0, msg, str1, str2);
154
ctxt->wellFormed = 0;
155
if (ctxt->recovery == 0)
156
ctxt->disableSAX = 1;
162
* @ctxt: an XML parser context
163
* @msg: the error message
164
* @str: error informations
166
* Handle an internal error
169
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
171
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
172
(ctxt->instate == XML_PARSER_EOF))
175
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
176
__xmlRaiseError(NULL, NULL, NULL,
177
ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
178
XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
181
ctxt->wellFormed = 0;
182
if (ctxt->recovery == 0)
183
ctxt->disableSAX = 1;
189
* @ctxt: an XML parser context
190
* @error: the error number
191
* @msg: the error message
192
* @val: an integer value
197
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
198
const char *msg, int val)
200
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
201
(ctxt->instate == XML_PARSER_EOF))
205
__xmlRaiseError(NULL, NULL, NULL,
206
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
207
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
209
ctxt->wellFormed = 0;
210
if (ctxt->recovery == 0)
211
ctxt->disableSAX = 1;
217
* @c: an unicode character (int)
219
* Check whether the character is allowed by the production
220
* [84] Letter ::= BaseChar | Ideographic
222
* Returns 0 if not, non-zero otherwise
226
return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
229
/************************************************************************
231
* Input handling functions for progressive parsing *
233
************************************************************************/
235
/* #define DEBUG_INPUT */
236
/* #define DEBUG_STACK */
237
/* #define DEBUG_PUSH */
240
/* we need to keep enough input to show errors in context */
244
#define CHECK_BUFFER(in) check_buffer(in)
247
void check_buffer(xmlParserInputPtr in) {
248
if (in->base != xmlBufContent(in->buf->buffer)) {
249
xmlGenericError(xmlGenericErrorContext,
250
"xmlParserInput: base mismatch problem\n");
252
if (in->cur < in->base) {
253
xmlGenericError(xmlGenericErrorContext,
254
"xmlParserInput: cur < base problem\n");
256
if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
257
xmlGenericError(xmlGenericErrorContext,
258
"xmlParserInput: cur > base + use problem\n");
260
xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
261
(int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
262
xmlBufUse(in->buf->buffer));
266
#define CHECK_BUFFER(in)
271
* xmlParserInputRead:
272
* @in: an XML parser input
273
* @len: an indicative size for the lookahead
275
* This function was internal and is deprecated.
277
* Returns -1 as this is an error to use it.
280
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
285
* xmlParserInputGrow:
286
* @in: an XML parser input
287
* @len: an indicative size for the lookahead
289
* This function increase the input for the parser. It tries to
290
* preserve pointers to the input buffer, and keep already read data
292
* Returns the amount of char read, or -1 in case of error, 0 indicate the
296
xmlParserInputGrow(xmlParserInputPtr in, int len) {
299
const xmlChar *content;
301
if ((in == NULL) || (len < 0)) return(-1);
303
xmlGenericError(xmlGenericErrorContext, "Grow\n");
305
if (in->buf == NULL) return(-1);
306
if (in->base == NULL) return(-1);
307
if (in->cur == NULL) return(-1);
308
if (in->buf->buffer == NULL) return(-1);
312
indx = in->cur - in->base;
313
if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
319
if (in->buf->readcallback != NULL) {
320
ret = xmlParserInputBufferGrow(in->buf, len);
325
* NOTE : in->base may be a "dangling" i.e. freed pointer in this
326
* block, but we use it really as an integer to do some
327
* pointer arithmetic. Insure will raise it as a bug but in
328
* that specific case, that's not !
331
content = xmlBufContent(in->buf->buffer);
332
if (in->base != content) {
334
* the buffer has been reallocated
336
indx = in->cur - in->base;
338
in->cur = &content[indx];
340
in->end = xmlBufEnd(in->buf->buffer);
348
* xmlParserInputShrink:
349
* @in: an XML parser input
351
* This function removes used input for the parser.
354
xmlParserInputShrink(xmlParserInputPtr in) {
358
const xmlChar *content;
361
xmlGenericError(xmlGenericErrorContext, "Shrink\n");
363
if (in == NULL) return;
364
if (in->buf == NULL) return;
365
if (in->base == NULL) return;
366
if (in->cur == NULL) return;
367
if (in->buf->buffer == NULL) return;
371
used = in->cur - xmlBufContent(in->buf->buffer);
373
* Do not shrink on large buffers whose only a tiny fraction
376
if (used > INPUT_CHUNK) {
377
ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
382
in->end = xmlBufEnd(in->buf->buffer);
387
if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
390
xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
391
content = xmlBufContent(in->buf->buffer);
392
if (in->base != content) {
394
* the buffer has been reallocated
396
indx = in->cur - in->base;
398
in->cur = &content[indx];
400
in->end = xmlBufEnd(in->buf->buffer);
405
/************************************************************************
407
* UTF8 character input and related functions *
409
************************************************************************/
413
* @ctxt: the XML parser context
415
* Skip to the next char input char.
419
xmlNextChar(xmlParserCtxtPtr ctxt)
421
if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
422
(ctxt->input == NULL))
425
if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
426
if ((*ctxt->input->cur == 0) &&
427
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
428
(ctxt->instate != XML_PARSER_COMMENT)) {
430
* If we are at the end of the current entity and
431
* the context allows it, we pop consumed entities
433
* the auto closing should be blocked in other cases
437
const unsigned char *cur;
441
* 2.11 End-of-Line Handling
442
* the literal two-character sequence "#xD#xA" or a standalone
443
* literal #xD, an XML processor must pass to the application
444
* the single character #xA.
446
if (*(ctxt->input->cur) == '\n') {
447
ctxt->input->line++; ctxt->input->col = 1;
452
* We are supposed to handle UTF8, check it's valid
453
* From rfc2044: encoding of the Unicode values on UTF-8:
455
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
456
* 0000 0000-0000 007F 0xxxxxxx
457
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
458
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
460
* Check for the 0x110000 limit too
462
cur = ctxt->input->cur;
469
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
470
cur = ctxt->input->cur;
472
if ((cur[1] & 0xc0) != 0x80)
474
if ((c & 0xe0) == 0xe0) {
478
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
479
cur = ctxt->input->cur;
481
if ((cur[2] & 0xc0) != 0x80)
483
if ((c & 0xf0) == 0xf0) {
485
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
486
cur = ctxt->input->cur;
488
if (((c & 0xf8) != 0xf0) ||
489
((cur[3] & 0xc0) != 0x80))
492
ctxt->input->cur += 4;
493
val = (cur[0] & 0x7) << 18;
494
val |= (cur[1] & 0x3f) << 12;
495
val |= (cur[2] & 0x3f) << 6;
496
val |= cur[3] & 0x3f;
499
ctxt->input->cur += 3;
500
val = (cur[0] & 0xf) << 12;
501
val |= (cur[1] & 0x3f) << 6;
502
val |= cur[2] & 0x3f;
504
if (((val > 0xd7ff) && (val < 0xe000)) ||
505
((val > 0xfffd) && (val < 0x10000)) ||
507
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
508
"Char 0x%X out of allowed range\n",
513
ctxt->input->cur += 2;
519
if (*ctxt->input->cur == 0)
520
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
524
* Assume it's a fixed length encoding (1) with
525
* a compatible encoding for the ASCII set, since
526
* XML constructs only use < 128 chars
529
if (*(ctxt->input->cur) == '\n') {
530
ctxt->input->line++; ctxt->input->col = 1;
535
if (*ctxt->input->cur == 0)
536
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
538
if ((*ctxt->input->cur == '%') && (!ctxt->html))
539
xmlParserHandlePEReference(ctxt);
540
if ((*ctxt->input->cur == 0) &&
541
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
546
* If we detect an UTF8 error that probably mean that the
547
* input encoding didn't get properly advertised in the
548
* declaration header. Report the error and switch the encoding
549
* to ISO-Latin-1 (if you don't like this policy, just declare the
552
if ((ctxt == NULL) || (ctxt->input == NULL) ||
553
(ctxt->input->end - ctxt->input->cur < 4)) {
554
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
555
"Input is not proper UTF-8, indicate encoding !\n",
560
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
561
ctxt->input->cur[0], ctxt->input->cur[1],
562
ctxt->input->cur[2], ctxt->input->cur[3]);
563
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
564
"Input is not proper UTF-8, indicate encoding !\n%s",
565
BAD_CAST buffer, NULL);
567
ctxt->charset = XML_CHAR_ENCODING_8859_1;
574
* @ctxt: the XML parser context
575
* @len: pointer to the length of the char read
577
* The current char value, if using UTF-8 this may actually span multiple
578
* bytes in the input buffer. Implement the end of line normalization:
579
* 2.11 End-of-Line Handling
580
* Wherever an external parsed entity or the literal entity value
581
* of an internal parsed entity contains either the literal two-character
582
* sequence "#xD#xA" or a standalone literal #xD, an XML processor
583
* must pass to the application the single character #xA.
584
* This behavior can conveniently be produced by normalizing all
585
* line breaks to #xA on input, before parsing.)
587
* Returns the current char value and its length
591
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
592
if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
593
if (ctxt->instate == XML_PARSER_EOF)
596
if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
598
return((int) *ctxt->input->cur);
600
if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
602
* We are supposed to handle UTF8, check it's valid
603
* From rfc2044: encoding of the Unicode values on UTF-8:
605
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
606
* 0000 0000-0000 007F 0xxxxxxx
607
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
608
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
610
* Check for the 0x110000 limit too
612
const unsigned char *cur = ctxt->input->cur;
618
if (((c & 0x40) == 0) || (c == 0xC0))
621
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
622
cur = ctxt->input->cur;
624
if ((cur[1] & 0xc0) != 0x80)
626
if ((c & 0xe0) == 0xe0) {
628
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
629
cur = ctxt->input->cur;
631
if ((cur[2] & 0xc0) != 0x80)
633
if ((c & 0xf0) == 0xf0) {
635
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
636
cur = ctxt->input->cur;
638
if (((c & 0xf8) != 0xf0) ||
639
((cur[3] & 0xc0) != 0x80))
643
val = (cur[0] & 0x7) << 18;
644
val |= (cur[1] & 0x3f) << 12;
645
val |= (cur[2] & 0x3f) << 6;
646
val |= cur[3] & 0x3f;
652
val = (cur[0] & 0xf) << 12;
653
val |= (cur[1] & 0x3f) << 6;
654
val |= cur[2] & 0x3f;
661
val = (cur[0] & 0x1f) << 6;
662
val |= cur[1] & 0x3f;
667
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
668
"Char 0x%X out of allowed range\n", val);
674
if (*ctxt->input->cur == 0)
675
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
676
if ((*ctxt->input->cur == 0) &&
677
(ctxt->input->end > ctxt->input->cur)) {
678
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
679
"Char 0x0 out of allowed range\n", 0);
681
if (*ctxt->input->cur == 0xD) {
682
if (ctxt->input->cur[1] == 0xA) {
688
return((int) *ctxt->input->cur);
692
* Assume it's a fixed length encoding (1) with
693
* a compatible encoding for the ASCII set, since
694
* XML constructs only use < 128 chars
697
if (*ctxt->input->cur == 0xD) {
698
if (ctxt->input->cur[1] == 0xA) {
704
return((int) *ctxt->input->cur);
707
* An encoding problem may arise from a truncated input buffer
708
* splitting a character in the middle. In that case do not raise
709
* an error but return 0 to endicate an end of stream problem
711
if (ctxt->input->end - ctxt->input->cur < 4) {
717
* If we detect an UTF8 error that probably mean that the
718
* input encoding didn't get properly advertised in the
719
* declaration header. Report the error and switch the encoding
720
* to ISO-Latin-1 (if you don't like this policy, just declare the
726
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
727
ctxt->input->cur[0], ctxt->input->cur[1],
728
ctxt->input->cur[2], ctxt->input->cur[3]);
729
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
730
"Input is not proper UTF-8, indicate encoding !\n%s",
731
BAD_CAST buffer, NULL);
733
ctxt->charset = XML_CHAR_ENCODING_8859_1;
735
return((int) *ctxt->input->cur);
739
* xmlStringCurrentChar:
740
* @ctxt: the XML parser context
741
* @cur: pointer to the beginning of the char
742
* @len: pointer to the length of the char read
744
* The current char value, if using UTF-8 this may actually span multiple
745
* bytes in the input buffer.
747
* Returns the current char value and its length
751
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
753
if ((len == NULL) || (cur == NULL)) return(0);
754
if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
756
* We are supposed to handle UTF8, check it's valid
757
* From rfc2044: encoding of the Unicode values on UTF-8:
759
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
760
* 0000 0000-0000 007F 0xxxxxxx
761
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
762
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
764
* Check for the 0x110000 limit too
771
if ((cur[1] & 0xc0) != 0x80)
773
if ((c & 0xe0) == 0xe0) {
775
if ((cur[2] & 0xc0) != 0x80)
777
if ((c & 0xf0) == 0xf0) {
778
if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
782
val = (cur[0] & 0x7) << 18;
783
val |= (cur[1] & 0x3f) << 12;
784
val |= (cur[2] & 0x3f) << 6;
785
val |= cur[3] & 0x3f;
789
val = (cur[0] & 0xf) << 12;
790
val |= (cur[1] & 0x3f) << 6;
791
val |= cur[2] & 0x3f;
796
val = (cur[0] & 0x1f) << 6;
797
val |= cur[1] & 0x3f;
800
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
801
"Char 0x%X out of allowed range\n", val);
811
* Assume it's a fixed length encoding (1) with
812
* a compatible encoding for the ASCII set, since
813
* XML constructs only use < 128 chars
820
* An encoding problem may arise from a truncated input buffer
821
* splitting a character in the middle. In that case do not raise
822
* an error but return 0 to endicate an end of stream problem
824
if ((ctxt == NULL) || (ctxt->input == NULL) ||
825
(ctxt->input->end - ctxt->input->cur < 4)) {
830
* If we detect an UTF8 error that probably mean that the
831
* input encoding didn't get properly advertised in the
832
* declaration header. Report the error and switch the encoding
833
* to ISO-Latin-1 (if you don't like this policy, just declare the
839
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
840
ctxt->input->cur[0], ctxt->input->cur[1],
841
ctxt->input->cur[2], ctxt->input->cur[3]);
842
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
843
"Input is not proper UTF-8, indicate encoding !\n%s",
844
BAD_CAST buffer, NULL);
851
* xmlCopyCharMultiByte:
852
* @out: pointer to an array of xmlChar
853
* @val: the char value
855
* append the char value in the array
857
* Returns the number of xmlChar written
860
xmlCopyCharMultiByte(xmlChar *out, int val) {
861
if (out == NULL) return(0);
863
* We are supposed to handle UTF8, check it's valid
864
* From rfc2044: encoding of the Unicode values on UTF-8:
866
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
867
* 0000 0000-0000 007F 0xxxxxxx
868
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
869
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
872
xmlChar *savedout = out;
874
if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
875
else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
876
else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
878
xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
879
"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
883
for ( ; bits >= 0; bits-= 6)
884
*out++= ((val >> bits) & 0x3F) | 0x80 ;
885
return (out - savedout);
887
*out = (xmlChar) val;
893
* @len: Ignored, compatibility
894
* @out: pointer to an array of xmlChar
895
* @val: the char value
897
* append the char value in the array
899
* Returns the number of xmlChar written
903
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
904
if (out == NULL) return(0);
905
/* the len parameter is ignored */
907
return(xmlCopyCharMultiByte (out, val));
909
*out = (xmlChar) val;
913
/************************************************************************
915
* Commodity functions to switch encodings *
917
************************************************************************/
920
xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
921
xmlCharEncodingHandlerPtr handler, int len);
923
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
924
xmlCharEncodingHandlerPtr handler, int len);
927
* @ctxt: the parser context
928
* @enc: the encoding value (number)
930
* change the input functions when discovering the character encoding
933
* Returns 0 in case of success, -1 otherwise
936
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
938
xmlCharEncodingHandlerPtr handler;
941
if (ctxt == NULL) return(-1);
943
case XML_CHAR_ENCODING_ERROR:
944
__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
945
"encoding unknown\n", NULL, NULL);
947
case XML_CHAR_ENCODING_NONE:
948
/* let's assume it's UTF-8 without the XML decl */
949
ctxt->charset = XML_CHAR_ENCODING_UTF8;
951
case XML_CHAR_ENCODING_UTF8:
952
/* default encoding, no conversion should be needed */
953
ctxt->charset = XML_CHAR_ENCODING_UTF8;
956
* Errata on XML-1.0 June 20 2001
957
* Specific handling of the Byte Order Mark for
960
if ((ctxt->input != NULL) &&
961
(ctxt->input->cur[0] == 0xEF) &&
962
(ctxt->input->cur[1] == 0xBB) &&
963
(ctxt->input->cur[2] == 0xBF)) {
964
ctxt->input->cur += 3;
967
case XML_CHAR_ENCODING_UTF16LE:
968
case XML_CHAR_ENCODING_UTF16BE:
969
/*The raw input characters are encoded
970
*in UTF-16. As we expect this function
971
*to be called after xmlCharEncInFunc, we expect
972
*ctxt->input->cur to contain UTF-8 encoded characters.
973
*So the raw UTF16 Byte Order Mark
974
*has also been converted into
975
*an UTF-8 BOM. Let's skip that BOM.
977
if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
978
(ctxt->input->cur[0] == 0xEF) &&
979
(ctxt->input->cur[1] == 0xBB) &&
980
(ctxt->input->cur[2] == 0xBF)) {
981
ctxt->input->cur += 3;
985
case XML_CHAR_ENCODING_UCS2:
988
case XML_CHAR_ENCODING_UCS4BE:
989
case XML_CHAR_ENCODING_UCS4LE:
990
case XML_CHAR_ENCODING_UCS4_2143:
991
case XML_CHAR_ENCODING_UCS4_3412:
994
case XML_CHAR_ENCODING_EBCDIC:
995
case XML_CHAR_ENCODING_8859_1:
996
case XML_CHAR_ENCODING_8859_2:
997
case XML_CHAR_ENCODING_8859_3:
998
case XML_CHAR_ENCODING_8859_4:
999
case XML_CHAR_ENCODING_8859_5:
1000
case XML_CHAR_ENCODING_8859_6:
1001
case XML_CHAR_ENCODING_8859_7:
1002
case XML_CHAR_ENCODING_8859_8:
1003
case XML_CHAR_ENCODING_8859_9:
1004
case XML_CHAR_ENCODING_ASCII:
1005
case XML_CHAR_ENCODING_2022_JP:
1006
case XML_CHAR_ENCODING_SHIFT_JIS:
1007
case XML_CHAR_ENCODING_EUC_JP:
1011
handler = xmlGetCharEncodingHandler(enc);
1012
if (handler == NULL) {
1017
case XML_CHAR_ENCODING_ASCII:
1018
/* default encoding, no conversion should be needed */
1019
ctxt->charset = XML_CHAR_ENCODING_UTF8;
1021
case XML_CHAR_ENCODING_UTF16LE:
1023
case XML_CHAR_ENCODING_UTF16BE:
1025
case XML_CHAR_ENCODING_UCS4LE:
1026
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1027
"encoding not supported %s\n",
1028
BAD_CAST "USC4 little endian", NULL);
1030
case XML_CHAR_ENCODING_UCS4BE:
1031
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1032
"encoding not supported %s\n",
1033
BAD_CAST "USC4 big endian", NULL);
1035
case XML_CHAR_ENCODING_EBCDIC:
1036
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1037
"encoding not supported %s\n",
1038
BAD_CAST "EBCDIC", NULL);
1040
case XML_CHAR_ENCODING_UCS4_2143:
1041
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1042
"encoding not supported %s\n",
1043
BAD_CAST "UCS4 2143", NULL);
1045
case XML_CHAR_ENCODING_UCS4_3412:
1046
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1047
"encoding not supported %s\n",
1048
BAD_CAST "UCS4 3412", NULL);
1050
case XML_CHAR_ENCODING_UCS2:
1051
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1052
"encoding not supported %s\n",
1053
BAD_CAST "UCS2", NULL);
1055
case XML_CHAR_ENCODING_8859_1:
1056
case XML_CHAR_ENCODING_8859_2:
1057
case XML_CHAR_ENCODING_8859_3:
1058
case XML_CHAR_ENCODING_8859_4:
1059
case XML_CHAR_ENCODING_8859_5:
1060
case XML_CHAR_ENCODING_8859_6:
1061
case XML_CHAR_ENCODING_8859_7:
1062
case XML_CHAR_ENCODING_8859_8:
1063
case XML_CHAR_ENCODING_8859_9:
1065
* We used to keep the internal content in the
1066
* document encoding however this turns being unmaintainable
1067
* So xmlGetCharEncodingHandler() will return non-null
1068
* values for this now.
1070
if ((ctxt->inputNr == 1) &&
1071
(ctxt->encoding == NULL) &&
1072
(ctxt->input != NULL) &&
1073
(ctxt->input->encoding != NULL)) {
1074
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1076
ctxt->charset = enc;
1078
case XML_CHAR_ENCODING_2022_JP:
1079
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1080
"encoding not supported %s\n",
1081
BAD_CAST "ISO-2022-JP", NULL);
1083
case XML_CHAR_ENCODING_SHIFT_JIS:
1084
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1085
"encoding not supported %s\n",
1086
BAD_CAST "Shift_JIS", NULL);
1088
case XML_CHAR_ENCODING_EUC_JP:
1089
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1090
"encoding not supported %s\n",
1091
BAD_CAST "EUC-JP", NULL);
1097
if (handler == NULL)
1099
ctxt->charset = XML_CHAR_ENCODING_UTF8;
1100
return(xmlSwitchToEncodingInt(ctxt, handler, len));
1104
* xmlSwitchInputEncoding:
1105
* @ctxt: the parser context
1106
* @input: the input stream
1107
* @handler: the encoding handler
1108
* @len: the number of bytes to convert for the first line or -1
1110
* change the input functions when discovering the character encoding
1111
* of a given entity.
1113
* Returns 0 in case of success, -1 otherwise
1116
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1117
xmlCharEncodingHandlerPtr handler, int len)
1121
if (handler == NULL)
1125
if (input->buf != NULL) {
1126
if (input->buf->encoder != NULL) {
1128
* Check in case the auto encoding detetection triggered
1131
if (input->buf->encoder == handler)
1135
* "UTF-16" can be used for both LE and BE
1136
if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1137
BAD_CAST "UTF-16", 6)) &&
1138
(!xmlStrncmp(BAD_CAST handler->name,
1139
BAD_CAST "UTF-16", 6))) {
1145
* Note: this is a bit dangerous, but that's what it
1146
* takes to use nearly compatible signature for different
1149
xmlCharEncCloseFunc(input->buf->encoder);
1150
input->buf->encoder = handler;
1153
input->buf->encoder = handler;
1156
* Is there already some content down the pipe to convert ?
1158
if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1163
* Specific handling of the Byte Order Mark for
1166
if ((handler->name != NULL) &&
1167
(!strcmp(handler->name, "UTF-16LE") ||
1168
!strcmp(handler->name, "UTF-16")) &&
1169
(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1172
if ((handler->name != NULL) &&
1173
(!strcmp(handler->name, "UTF-16BE")) &&
1174
(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1178
* Errata on XML-1.0 June 20 2001
1179
* Specific handling of the Byte Order Mark for
1182
if ((handler->name != NULL) &&
1183
(!strcmp(handler->name, "UTF-8")) &&
1184
(input->cur[0] == 0xEF) &&
1185
(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1190
* Shrink the current input buffer.
1191
* Move it as the raw buffer and create a new input buffer
1193
processed = input->cur - input->base;
1194
xmlBufShrink(input->buf->buffer, processed);
1195
input->buf->raw = input->buf->buffer;
1196
input->buf->buffer = xmlBufCreate();
1197
input->buf->rawconsumed = processed;
1198
use = xmlBufUse(input->buf->raw);
1202
* convert as much as possible of the buffer
1204
nbchars = xmlCharEncInput(input->buf, 1);
1207
* convert just enough to get
1208
* '<?xml version="1.0" encoding="xxx"?>'
1209
* parsed with the autodetected encoding
1210
* into the parser reading buffer.
1212
nbchars = xmlCharEncFirstLineInput(input->buf, len);
1215
xmlErrInternal(ctxt,
1216
"switching encoding: encoder error\n",
1220
input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
1221
xmlBufResetInput(input->buf->buffer, input);
1224
} else if (input->length == 0) {
1226
* When parsing a static memory array one must know the
1227
* size to be able to convert the buffer.
1229
xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1236
* xmlSwitchInputEncoding:
1237
* @ctxt: the parser context
1238
* @input: the input stream
1239
* @handler: the encoding handler
1241
* change the input functions when discovering the character encoding
1242
* of a given entity.
1244
* Returns 0 in case of success, -1 otherwise
1247
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1248
xmlCharEncodingHandlerPtr handler) {
1249
return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1253
* xmlSwitchToEncodingInt:
1254
* @ctxt: the parser context
1255
* @handler: the encoding handler
1256
* @len: the length to convert or -1
1258
* change the input functions when discovering the character encoding
1259
* of a given entity, and convert only @len bytes of the output, this
1260
* is needed on auto detect to allows any declared encoding later to
1261
* convert the actual content after the xmlDecl
1263
* Returns 0 in case of success, -1 otherwise
1266
xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
1267
xmlCharEncodingHandlerPtr handler, int len) {
1270
if (handler != NULL) {
1271
if (ctxt->input != NULL) {
1272
ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
1274
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1279
* The parsing is now done in UTF8 natively
1281
ctxt->charset = XML_CHAR_ENCODING_UTF8;
1288
* xmlSwitchToEncoding:
1289
* @ctxt: the parser context
1290
* @handler: the encoding handler
1292
* change the input functions when discovering the character encoding
1293
* of a given entity.
1295
* Returns 0 in case of success, -1 otherwise
1298
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1300
return (xmlSwitchToEncodingInt(ctxt, handler, -1));
1303
/************************************************************************
1305
* Commodity functions to handle entities processing *
1307
************************************************************************/
1310
* xmlFreeInputStream:
1311
* @input: an xmlParserInputPtr
1313
* Free up an input stream.
1316
xmlFreeInputStream(xmlParserInputPtr input) {
1317
if (input == NULL) return;
1319
if (input->filename != NULL) xmlFree((char *) input->filename);
1320
if (input->directory != NULL) xmlFree((char *) input->directory);
1321
if (input->encoding != NULL) xmlFree((char *) input->encoding);
1322
if (input->version != NULL) xmlFree((char *) input->version);
1323
if ((input->free != NULL) && (input->base != NULL))
1324
input->free((xmlChar *) input->base);
1325
if (input->buf != NULL)
1326
xmlFreeParserInputBuffer(input->buf);
1331
* xmlNewInputStream:
1332
* @ctxt: an XML parser context
1334
* Create a new input stream structure.
1336
* Returns the new input stream or NULL
1339
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1340
xmlParserInputPtr input;
1342
input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1343
if (input == NULL) {
1344
xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1347
memset(input, 0, sizeof(xmlParserInput));
1350
input->standalone = -1;
1353
* If the context is NULL the id cannot be initialized, but that
1354
* should not happen while parsing which is the situation where
1355
* the id is actually needed.
1358
input->id = ctxt->input_id++;
1364
* xmlNewIOInputStream:
1365
* @ctxt: an XML parser context
1366
* @input: an I/O Input
1367
* @enc: the charset encoding if known
1369
* Create a new input stream structure encapsulating the @input into
1370
* a stream suitable for the parser.
1372
* Returns the new input stream or NULL
1375
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1376
xmlCharEncoding enc) {
1377
xmlParserInputPtr inputStream;
1379
if (input == NULL) return(NULL);
1380
if (xmlParserDebugEntities)
1381
xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1382
inputStream = xmlNewInputStream(ctxt);
1383
if (inputStream == NULL) {
1386
inputStream->filename = NULL;
1387
inputStream->buf = input;
1388
xmlBufResetInput(inputStream->buf->buffer, inputStream);
1390
if (enc != XML_CHAR_ENCODING_NONE) {
1391
xmlSwitchEncoding(ctxt, enc);
1394
return(inputStream);
1398
* xmlNewEntityInputStream:
1399
* @ctxt: an XML parser context
1400
* @entity: an Entity pointer
1402
* Create a new input stream based on an xmlEntityPtr
1404
* Returns the new input stream or NULL
1407
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1408
xmlParserInputPtr input;
1410
if (entity == NULL) {
1411
xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1415
if (xmlParserDebugEntities)
1416
xmlGenericError(xmlGenericErrorContext,
1417
"new input from entity: %s\n", entity->name);
1418
if (entity->content == NULL) {
1419
switch (entity->etype) {
1420
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1421
xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1424
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1425
case XML_EXTERNAL_PARAMETER_ENTITY:
1426
return(xmlLoadExternalEntity((char *) entity->URI,
1427
(char *) entity->ExternalID, ctxt));
1428
case XML_INTERNAL_GENERAL_ENTITY:
1429
xmlErrInternal(ctxt,
1430
"Internal entity %s without content !\n",
1433
case XML_INTERNAL_PARAMETER_ENTITY:
1434
xmlErrInternal(ctxt,
1435
"Internal parameter entity %s without content !\n",
1438
case XML_INTERNAL_PREDEFINED_ENTITY:
1439
xmlErrInternal(ctxt,
1440
"Predefined entity %s without content !\n",
1446
input = xmlNewInputStream(ctxt);
1447
if (input == NULL) {
1450
if (entity->URI != NULL)
1451
input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1452
input->base = entity->content;
1453
input->cur = entity->content;
1454
input->length = entity->length;
1455
input->end = &entity->content[input->length];
1460
* xmlNewStringInputStream:
1461
* @ctxt: an XML parser context
1462
* @buffer: an memory buffer
1464
* Create a new input stream based on a memory buffer.
1465
* Returns the new input stream
1468
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1469
xmlParserInputPtr input;
1471
if (buffer == NULL) {
1472
xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1476
if (xmlParserDebugEntities)
1477
xmlGenericError(xmlGenericErrorContext,
1478
"new fixed input: %.30s\n", buffer);
1479
input = xmlNewInputStream(ctxt);
1480
if (input == NULL) {
1481
xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1484
input->base = buffer;
1485
input->cur = buffer;
1486
input->length = xmlStrlen(buffer);
1487
input->end = &buffer[input->length];
1492
* xmlNewInputFromFile:
1493
* @ctxt: an XML parser context
1494
* @filename: the filename to use as entity
1496
* Create a new input stream based on a file or an URL.
1498
* Returns the new input stream or NULL in case of error
1501
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1502
xmlParserInputBufferPtr buf;
1503
xmlParserInputPtr inputStream;
1504
char *directory = NULL;
1505
xmlChar *URI = NULL;
1507
if (xmlParserDebugEntities)
1508
xmlGenericError(xmlGenericErrorContext,
1509
"new input from file: %s\n", filename);
1510
if (ctxt == NULL) return(NULL);
1511
buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1513
if (filename == NULL)
1514
__xmlLoaderErr(ctxt,
1515
"failed to load external entity: NULL filename \n",
1518
__xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1519
(const char *) filename);
1523
inputStream = xmlNewInputStream(ctxt);
1524
if (inputStream == NULL)
1527
inputStream->buf = buf;
1528
inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1529
if (inputStream == NULL)
1532
if (inputStream->filename == NULL)
1533
URI = xmlStrdup((xmlChar *) filename);
1535
URI = xmlStrdup((xmlChar *) inputStream->filename);
1536
directory = xmlParserGetDirectory((const char *) URI);
1537
if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1538
inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1539
if (URI != NULL) xmlFree((char *) URI);
1540
inputStream->directory = directory;
1542
xmlBufResetInput(inputStream->buf->buffer, inputStream);
1543
if ((ctxt->directory == NULL) && (directory != NULL))
1544
ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1545
return(inputStream);
1548
/************************************************************************
1550
* Commodity functions to handle parser contexts *
1552
************************************************************************/
1555
* xmlInitParserCtxt:
1556
* @ctxt: an XML parser context
1558
* Initialize a parser context
1560
* Returns 0 in case of success and -1 in case of error
1564
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1566
xmlParserInputPtr input;
1569
xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1573
xmlDefaultSAXHandlerInit();
1575
if (ctxt->dict == NULL)
1576
ctxt->dict = xmlDictCreate();
1577
if (ctxt->dict == NULL) {
1578
xmlErrMemory(NULL, "cannot initialize parser context\n");
1581
xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1583
if (ctxt->sax == NULL)
1584
ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1585
if (ctxt->sax == NULL) {
1586
xmlErrMemory(NULL, "cannot initialize parser context\n");
1590
xmlSAXVersion(ctxt->sax, 2);
1594
/* Allocate the Input stack */
1595
if (ctxt->inputTab == NULL) {
1596
ctxt->inputTab = (xmlParserInputPtr *)
1597
xmlMalloc(5 * sizeof(xmlParserInputPtr));
1600
if (ctxt->inputTab == NULL) {
1601
xmlErrMemory(NULL, "cannot initialize parser context\n");
1607
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1608
xmlFreeInputStream(input);
1613
ctxt->version = NULL;
1614
ctxt->encoding = NULL;
1615
ctxt->standalone = -1;
1616
ctxt->hasExternalSubset = 0;
1617
ctxt->hasPErefs = 0;
1620
ctxt->instate = XML_PARSER_START;
1622
ctxt->directory = NULL;
1624
/* Allocate the Node stack */
1625
if (ctxt->nodeTab == NULL) {
1626
ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1629
if (ctxt->nodeTab == NULL) {
1630
xmlErrMemory(NULL, "cannot initialize parser context\n");
1642
/* Allocate the Name stack */
1643
if (ctxt->nameTab == NULL) {
1644
ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1647
if (ctxt->nameTab == NULL) {
1648
xmlErrMemory(NULL, "cannot initialize parser context\n");
1663
/* Allocate the space stack */
1664
if (ctxt->spaceTab == NULL) {
1665
ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1666
ctxt->spaceMax = 10;
1668
if (ctxt->spaceTab == NULL) {
1669
xmlErrMemory(NULL, "cannot initialize parser context\n");
1685
ctxt->spaceMax = 10;
1686
ctxt->spaceTab[0] = -1;
1687
ctxt->space = &ctxt->spaceTab[0];
1688
ctxt->userData = ctxt;
1690
ctxt->wellFormed = 1;
1691
ctxt->nsWellFormed = 1;
1693
ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1694
ctxt->validate = xmlDoValidityCheckingDefaultValue;
1695
ctxt->pedantic = xmlPedanticParserDefaultValue;
1696
ctxt->linenumbers = xmlLineNumbersDefaultValue;
1697
ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1698
if (ctxt->keepBlanks == 0)
1699
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1701
ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1702
ctxt->vctxt.userData = ctxt;
1703
ctxt->vctxt.error = xmlParserValidityError;
1704
ctxt->vctxt.warning = xmlParserValidityWarning;
1705
if (ctxt->validate) {
1706
if (xmlGetWarningsDefaultValue == 0)
1707
ctxt->vctxt.warning = NULL;
1709
ctxt->vctxt.warning = xmlParserValidityWarning;
1710
ctxt->vctxt.nodeMax = 0;
1712
ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1713
ctxt->record_info = 0;
1715
ctxt->checkIndex = 0;
1717
ctxt->errNo = XML_ERR_OK;
1719
ctxt->charset = XML_CHAR_ENCODING_UTF8;
1720
ctxt->catalogs = NULL;
1721
ctxt->nbentities = 0;
1722
ctxt->sizeentities = 0;
1723
ctxt->sizeentcopy = 0;
1725
xmlInitNodeInfoSeq(&ctxt->node_seq);
1730
* xmlFreeParserCtxt:
1731
* @ctxt: an XML parser context
1733
* Free all the memory used by a parser context. However the parsed
1734
* document in ctxt->myDoc is not freed.
1738
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1740
xmlParserInputPtr input;
1742
if (ctxt == NULL) return;
1744
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1745
xmlFreeInputStream(input);
1747
if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1748
if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1749
if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1750
if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1751
if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1752
if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1753
if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1754
if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1755
if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1756
#ifdef LIBXML_SAX1_ENABLED
1757
if ((ctxt->sax != NULL) &&
1758
(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1760
if (ctxt->sax != NULL)
1761
#endif /* LIBXML_SAX1_ENABLED */
1763
if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1764
if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1765
if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1766
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1767
if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1768
if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1769
if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1770
if (ctxt->attsDefault != NULL)
1771
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1772
if (ctxt->attsSpecial != NULL)
1773
xmlHashFree(ctxt->attsSpecial, NULL);
1774
if (ctxt->freeElems != NULL) {
1775
xmlNodePtr cur, next;
1777
cur = ctxt->freeElems;
1778
while (cur != NULL) {
1784
if (ctxt->freeAttrs != NULL) {
1785
xmlAttrPtr cur, next;
1787
cur = ctxt->freeAttrs;
1788
while (cur != NULL) {
1795
* cleanup the error strings
1797
if (ctxt->lastError.message != NULL)
1798
xmlFree(ctxt->lastError.message);
1799
if (ctxt->lastError.file != NULL)
1800
xmlFree(ctxt->lastError.file);
1801
if (ctxt->lastError.str1 != NULL)
1802
xmlFree(ctxt->lastError.str1);
1803
if (ctxt->lastError.str2 != NULL)
1804
xmlFree(ctxt->lastError.str2);
1805
if (ctxt->lastError.str3 != NULL)
1806
xmlFree(ctxt->lastError.str3);
1808
#ifdef LIBXML_CATALOG_ENABLED
1809
if (ctxt->catalogs != NULL)
1810
xmlCatalogFreeLocal(ctxt->catalogs);
1818
* Allocate and initialize a new parser context.
1820
* Returns the xmlParserCtxtPtr or NULL
1824
xmlNewParserCtxt(void)
1826
xmlParserCtxtPtr ctxt;
1828
ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1830
xmlErrMemory(NULL, "cannot allocate parser context\n");
1833
memset(ctxt, 0, sizeof(xmlParserCtxt));
1834
if (xmlInitParserCtxt(ctxt) < 0) {
1835
xmlFreeParserCtxt(ctxt);
1841
/************************************************************************
1843
* Handling of node informations *
1845
************************************************************************/
1848
* xmlClearParserCtxt:
1849
* @ctxt: an XML parser context
1851
* Clear (release owned resources) and reinitialize a parser context
1855
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1859
xmlClearNodeInfoSeq(&ctxt->node_seq);
1865
* xmlParserFindNodeInfo:
1866
* @ctx: an XML parser context
1867
* @node: an XML node within the tree
1869
* Find the parser node info struct for a given node
1871
* Returns an xmlParserNodeInfo block pointer or NULL
1873
const xmlParserNodeInfo *
1874
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1878
if ((ctx == NULL) || (node == NULL))
1880
/* Find position where node should be at */
1881
pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1882
if (pos < ctx->node_seq.length
1883
&& ctx->node_seq.buffer[pos].node == node)
1884
return &ctx->node_seq.buffer[pos];
1891
* xmlInitNodeInfoSeq:
1892
* @seq: a node info sequence pointer
1894
* -- Initialize (set to initial state) node info sequence
1897
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1907
* xmlClearNodeInfoSeq:
1908
* @seq: a node info sequence pointer
1910
* -- Clear (release memory and reinitialize) node
1914
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1918
if (seq->buffer != NULL)
1919
xmlFree(seq->buffer);
1920
xmlInitNodeInfoSeq(seq);
1924
* xmlParserFindNodeInfoIndex:
1925
* @seq: a node info sequence pointer
1926
* @node: an XML node pointer
1929
* xmlParserFindNodeInfoIndex : Find the index that the info record for
1930
* the given node is or should be at in a sorted sequence
1932
* Returns a long indicating the position of the record
1935
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1936
const xmlNodePtr node)
1938
unsigned long upper, lower, middle;
1941
if ((seq == NULL) || (node == NULL))
1942
return ((unsigned long) -1);
1944
/* Do a binary search for the key */
1946
upper = seq->length;
1948
while (lower <= upper && !found) {
1949
middle = lower + (upper - lower) / 2;
1950
if (node == seq->buffer[middle - 1].node)
1952
else if (node < seq->buffer[middle - 1].node)
1958
/* Return position */
1959
if (middle == 0 || seq->buffer[middle - 1].node < node)
1967
* xmlParserAddNodeInfo:
1968
* @ctxt: an XML parser context
1969
* @info: a node info sequence pointer
1971
* Insert node info record into the sorted sequence
1974
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1975
const xmlParserNodeInfoPtr info)
1979
if ((ctxt == NULL) || (info == NULL)) return;
1981
/* Find pos and check to see if node is already in the sequence */
1982
pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1985
if ((pos < ctxt->node_seq.length) &&
1986
(ctxt->node_seq.buffer != NULL) &&
1987
(ctxt->node_seq.buffer[pos].node == info->node)) {
1988
ctxt->node_seq.buffer[pos] = *info;
1991
/* Otherwise, we need to add new node to buffer */
1993
if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
1994
xmlParserNodeInfo *tmp_buffer;
1995
unsigned int byte_size;
1997
if (ctxt->node_seq.maximum == 0)
1998
ctxt->node_seq.maximum = 2;
1999
byte_size = (sizeof(*ctxt->node_seq.buffer) *
2000
(2 * ctxt->node_seq.maximum));
2002
if (ctxt->node_seq.buffer == NULL)
2003
tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2006
(xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2009
if (tmp_buffer == NULL) {
2010
xmlErrMemory(ctxt, "failed to allocate buffer\n");
2013
ctxt->node_seq.buffer = tmp_buffer;
2014
ctxt->node_seq.maximum *= 2;
2017
/* If position is not at end, move elements out of the way */
2018
if (pos != ctxt->node_seq.length) {
2021
for (i = ctxt->node_seq.length; i > pos; i--)
2022
ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2025
/* Copy element and increase length */
2026
ctxt->node_seq.buffer[pos] = *info;
2027
ctxt->node_seq.length++;
2031
/************************************************************************
2033
* Defaults settings *
2035
************************************************************************/
2037
* xmlPedanticParserDefault:
2040
* Set and return the previous value for enabling pedantic warnings.
2042
* Returns the last value for 0 for no substitution, 1 for substitution.
2046
xmlPedanticParserDefault(int val) {
2047
int old = xmlPedanticParserDefaultValue;
2049
xmlPedanticParserDefaultValue = val;
2054
* xmlLineNumbersDefault:
2057
* Set and return the previous value for enabling line numbers in elements
2058
* contents. This may break on old application and is turned off by default.
2060
* Returns the last value for 0 for no substitution, 1 for substitution.
2064
xmlLineNumbersDefault(int val) {
2065
int old = xmlLineNumbersDefaultValue;
2067
xmlLineNumbersDefaultValue = val;
2072
* xmlSubstituteEntitiesDefault:
2075
* Set and return the previous value for default entity support.
2076
* Initially the parser always keep entity references instead of substituting
2077
* entity values in the output. This function has to be used to change the
2078
* default parser behavior
2079
* SAX::substituteEntities() has to be used for changing that on a file by
2082
* Returns the last value for 0 for no substitution, 1 for substitution.
2086
xmlSubstituteEntitiesDefault(int val) {
2087
int old = xmlSubstituteEntitiesDefaultValue;
2089
xmlSubstituteEntitiesDefaultValue = val;
2094
* xmlKeepBlanksDefault:
2097
* Set and return the previous value for default blanks text nodes support.
2098
* The 1.x version of the parser used an heuristic to try to detect
2099
* ignorable white spaces. As a result the SAX callback was generating
2100
* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2101
* using the DOM output text nodes containing those blanks were not generated.
2102
* The 2.x and later version will switch to the XML standard way and
2103
* ignorableWhitespace() are only generated when running the parser in
2104
* validating mode and when the current element doesn't allow CDATA or
2106
* This function is provided as a way to force the standard behavior
2107
* on 1.X libs and to switch back to the old mode for compatibility when
2108
* running 1.X client code on 2.X . Upgrade of 1.X code should be done
2109
* by using xmlIsBlankNode() commodity function to detect the "empty"
2111
* This value also affect autogeneration of indentation when saving code
2112
* if blanks sections are kept, indentation is not generated.
2114
* Returns the last value for 0 for no substitution, 1 for substitution.
2118
xmlKeepBlanksDefault(int val) {
2119
int old = xmlKeepBlanksDefaultValue;
2121
xmlKeepBlanksDefaultValue = val;
2122
if (!val) xmlIndentTreeOutput = 1;
2126
#define bottom_parserInternals
2127
#include "elfgcchack.h"