2
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
* implemented on top of the SAX interfaces
6
* The XML specification:
7
* http://www.w3.org/TR/REC-xml
8
* Original 1.0 version:
9
* http://www.w3.org/TR/1998/REC-xml-19980210
10
* XML second edition working draft
11
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
13
* Okay this is a big file, the parser core is around 7000 lines, then it
14
* is followed by the progressive parser top routines, then the various
15
* high level APIs to call the parser and a few miscellaneous functions.
16
* A number of helper functions and deprecated ones have been moved to
17
* parserInternals.c to reduce this file size.
18
* As much as possible the functions are associated with their relative
19
* production in the XML specification. A few productions defining the
20
* different ranges of character are actually implanted either in
21
* parserInternals.h or parserInternals.c
22
* The DOM tree build is realized from the default SAX callbacks in
24
* The routines doing the validation checks are in valid.c and called either
25
* from the SAX callbacks or as standalone functions using a preparsed
28
* See Copyright for the status of this software.
36
#if defined(WIN32) && !defined (__CYGWIN__)
37
#define XML_DIR_SEP '\\'
39
#define XML_DIR_SEP '/'
45
#include <libxml/xmlmemory.h>
46
#include <libxml/threads.h>
47
#include <libxml/globals.h>
48
#include <libxml/tree.h>
49
#include <libxml/parser.h>
50
#include <libxml/parserInternals.h>
51
#include <libxml/valid.h>
52
#include <libxml/entities.h>
53
#include <libxml/xmlerror.h>
54
#include <libxml/encoding.h>
55
#include <libxml/xmlIO.h>
56
#include <libxml/uri.h>
57
#ifdef LIBXML_CATALOG_ENABLED
58
#include <libxml/catalog.h>
60
#ifdef LIBXML_SCHEMAS_ENABLED
61
#include <libxml/xmlschemastypes.h>
62
#include <libxml/relaxng.h>
70
#ifdef HAVE_SYS_STAT_H
86
* arbitrary depth limit for the XML documents that we allow to
87
* process. This is not a limitation of the parser but a safety
90
unsigned int xmlParserMaxDepth = 1024;
94
#define XML_PARSER_BIG_BUFFER_SIZE 300
95
#define XML_PARSER_BUFFER_SIZE 100
97
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
100
* List of XML prefixed PI allowed by W3C specs
103
static const char *xmlW3CPIs[] = {
109
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
110
xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111
const xmlChar **str);
113
static xmlParserErrors
114
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115
xmlSAXHandlerPtr sax,
116
void *user_data, int depth, const xmlChar *URL,
117
const xmlChar *ID, xmlNodePtr *list);
119
#ifdef LIBXML_LEGACY_ENABLED
121
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122
xmlNodePtr lastNode);
123
#endif /* LIBXML_LEGACY_ENABLED */
125
static xmlParserErrors
126
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127
const xmlChar *string, void *user_data, xmlNodePtr *lst);
130
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
132
/************************************************************************
134
* Some factorized error routines *
136
************************************************************************/
139
* xmlErrAttributeDup:
140
* @ctxt: an XML parser context
141
* @prefix: the attribute prefix
142
* @localname: the attribute localname
144
* Handle a redefinition of attribute error
147
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
148
const xmlChar * localname)
150
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
151
(ctxt->instate == XML_PARSER_EOF))
154
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
156
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
157
ctxt->errNo, XML_ERR_FATAL, NULL, 0,
158
(const char *) localname, NULL, NULL, 0, 0,
159
"Attribute %s redefined\n", localname);
161
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
162
ctxt->errNo, XML_ERR_FATAL, NULL, 0,
163
(const char *) prefix, (const char *) localname,
164
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
167
ctxt->wellFormed = 0;
168
if (ctxt->recovery == 0)
169
ctxt->disableSAX = 1;
175
* @ctxt: an XML parser context
176
* @error: the error number
177
* @extra: extra information string
179
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
182
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
186
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
187
(ctxt->instate == XML_PARSER_EOF))
190
case XML_ERR_INVALID_HEX_CHARREF:
191
errmsg = "CharRef: invalid hexadecimal value\n";
193
case XML_ERR_INVALID_DEC_CHARREF:
194
errmsg = "CharRef: invalid decimal value\n";
196
case XML_ERR_INVALID_CHARREF:
197
errmsg = "CharRef: invalid value\n";
199
case XML_ERR_INTERNAL_ERROR:
200
errmsg = "internal error";
202
case XML_ERR_PEREF_AT_EOF:
203
errmsg = "PEReference at end of document\n";
205
case XML_ERR_PEREF_IN_PROLOG:
206
errmsg = "PEReference in prolog\n";
208
case XML_ERR_PEREF_IN_EPILOG:
209
errmsg = "PEReference in epilog\n";
211
case XML_ERR_PEREF_NO_NAME:
212
errmsg = "PEReference: no name\n";
214
case XML_ERR_PEREF_SEMICOL_MISSING:
215
errmsg = "PEReference: expecting ';'\n";
217
case XML_ERR_ENTITY_LOOP:
218
errmsg = "Detected an entity reference loop\n";
220
case XML_ERR_ENTITY_NOT_STARTED:
221
errmsg = "EntityValue: \" or ' expected\n";
223
case XML_ERR_ENTITY_PE_INTERNAL:
224
errmsg = "PEReferences forbidden in internal subset\n";
226
case XML_ERR_ENTITY_NOT_FINISHED:
227
errmsg = "EntityValue: \" or ' expected\n";
229
case XML_ERR_ATTRIBUTE_NOT_STARTED:
230
errmsg = "AttValue: \" or ' expected\n";
232
case XML_ERR_LT_IN_ATTRIBUTE:
233
errmsg = "Unescaped '<' not allowed in attributes values\n";
235
case XML_ERR_LITERAL_NOT_STARTED:
236
errmsg = "SystemLiteral \" or ' expected\n";
238
case XML_ERR_LITERAL_NOT_FINISHED:
239
errmsg = "Unfinished System or Public ID \" or ' expected\n";
241
case XML_ERR_MISPLACED_CDATA_END:
242
errmsg = "Sequence ']]>' not allowed in content\n";
244
case XML_ERR_URI_REQUIRED:
245
errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
247
case XML_ERR_PUBID_REQUIRED:
248
errmsg = "PUBLIC, the Public Identifier is missing\n";
250
case XML_ERR_HYPHEN_IN_COMMENT:
251
errmsg = "Comment must not contain '--' (double-hyphen)\n";
253
case XML_ERR_PI_NOT_STARTED:
254
errmsg = "xmlParsePI : no target name\n";
256
case XML_ERR_RESERVED_XML_NAME:
257
errmsg = "Invalid PI name\n";
259
case XML_ERR_NOTATION_NOT_STARTED:
260
errmsg = "NOTATION: Name expected here\n";
262
case XML_ERR_NOTATION_NOT_FINISHED:
263
errmsg = "'>' required to close NOTATION declaration\n";
265
case XML_ERR_VALUE_REQUIRED:
266
errmsg = "Entity value required\n";
268
case XML_ERR_URI_FRAGMENT:
269
errmsg = "Fragment not allowed";
271
case XML_ERR_ATTLIST_NOT_STARTED:
272
errmsg = "'(' required to start ATTLIST enumeration\n";
274
case XML_ERR_NMTOKEN_REQUIRED:
275
errmsg = "NmToken expected in ATTLIST enumeration\n";
277
case XML_ERR_ATTLIST_NOT_FINISHED:
278
errmsg = "')' required to finish ATTLIST enumeration\n";
280
case XML_ERR_MIXED_NOT_STARTED:
281
errmsg = "MixedContentDecl : '|' or ')*' expected\n";
283
case XML_ERR_PCDATA_REQUIRED:
284
errmsg = "MixedContentDecl : '#PCDATA' expected\n";
286
case XML_ERR_ELEMCONTENT_NOT_STARTED:
287
errmsg = "ContentDecl : Name or '(' expected\n";
289
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
290
errmsg = "ContentDecl : ',' '|' or ')' expected\n";
292
case XML_ERR_PEREF_IN_INT_SUBSET:
294
"PEReference: forbidden within markup decl in internal subset\n";
296
case XML_ERR_GT_REQUIRED:
297
errmsg = "expected '>'\n";
299
case XML_ERR_CONDSEC_INVALID:
300
errmsg = "XML conditional section '[' expected\n";
302
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
303
errmsg = "Content error in the external subset\n";
305
case XML_ERR_CONDSEC_INVALID_KEYWORD:
307
"conditional section INCLUDE or IGNORE keyword expected\n";
309
case XML_ERR_CONDSEC_NOT_FINISHED:
310
errmsg = "XML conditional section not closed\n";
312
case XML_ERR_XMLDECL_NOT_STARTED:
313
errmsg = "Text declaration '<?xml' required\n";
315
case XML_ERR_XMLDECL_NOT_FINISHED:
316
errmsg = "parsing XML declaration: '?>' expected\n";
318
case XML_ERR_EXT_ENTITY_STANDALONE:
319
errmsg = "external parsed entities cannot be standalone\n";
321
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
322
errmsg = "EntityRef: expecting ';'\n";
324
case XML_ERR_DOCTYPE_NOT_FINISHED:
325
errmsg = "DOCTYPE improperly terminated\n";
327
case XML_ERR_LTSLASH_REQUIRED:
328
errmsg = "EndTag: '</' not found\n";
330
case XML_ERR_EQUAL_REQUIRED:
331
errmsg = "expected '='\n";
333
case XML_ERR_STRING_NOT_CLOSED:
334
errmsg = "String not closed expecting \" or '\n";
336
case XML_ERR_STRING_NOT_STARTED:
337
errmsg = "String not started expecting ' or \"\n";
339
case XML_ERR_ENCODING_NAME:
340
errmsg = "Invalid XML encoding name\n";
342
case XML_ERR_STANDALONE_VALUE:
343
errmsg = "standalone accepts only 'yes' or 'no'\n";
345
case XML_ERR_DOCUMENT_EMPTY:
346
errmsg = "Document is empty\n";
348
case XML_ERR_DOCUMENT_END:
349
errmsg = "Extra content at the end of the document\n";
351
case XML_ERR_NOT_WELL_BALANCED:
352
errmsg = "chunk is not well balanced\n";
354
case XML_ERR_EXTRA_CONTENT:
355
errmsg = "extra content at the end of well balanced chunk\n";
357
case XML_ERR_VERSION_MISSING:
358
errmsg = "Malformed declaration expecting version\n";
366
errmsg = "Unregistered error message\n";
370
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
371
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
374
ctxt->wellFormed = 0;
375
if (ctxt->recovery == 0)
376
ctxt->disableSAX = 1;
382
* @ctxt: an XML parser context
383
* @error: the error number
384
* @msg: the error message
386
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
389
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
392
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393
(ctxt->instate == XML_PARSER_EOF))
397
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
398
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
400
ctxt->wellFormed = 0;
401
if (ctxt->recovery == 0)
402
ctxt->disableSAX = 1;
408
* @ctxt: an XML parser context
409
* @error: the error number
410
* @msg: the error message
417
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
418
const char *msg, const xmlChar *str1, const xmlChar *str2)
420
xmlStructuredErrorFunc schannel = NULL;
422
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
423
(ctxt->instate == XML_PARSER_EOF))
425
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
426
(ctxt->sax->initialized == XML_SAX2_MAGIC))
427
schannel = ctxt->sax->serror;
428
__xmlRaiseError(schannel,
429
(ctxt->sax) ? ctxt->sax->warning : NULL,
431
ctxt, NULL, XML_FROM_PARSER, error,
432
XML_ERR_WARNING, NULL, 0,
433
(const char *) str1, (const char *) str2, NULL, 0, 0,
434
msg, (const char *) str1, (const char *) str2);
439
* @ctxt: an XML parser context
440
* @error: the error number
441
* @msg: the error message
444
* Handle a validity error.
447
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
448
const char *msg, const xmlChar *str1)
450
xmlStructuredErrorFunc schannel = NULL;
452
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
453
(ctxt->instate == XML_PARSER_EOF))
457
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
458
schannel = ctxt->sax->serror;
460
__xmlRaiseError(schannel,
461
ctxt->vctxt.error, ctxt->vctxt.userData,
462
ctxt, NULL, XML_FROM_DTD, error,
463
XML_ERR_ERROR, NULL, 0, (const char *) str1,
465
msg, (const char *) str1);
473
* @ctxt: an XML parser context
474
* @error: the error number
475
* @msg: the error message
476
* @val: an integer value
478
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
481
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
482
const char *msg, int val)
484
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
485
(ctxt->instate == XML_PARSER_EOF))
489
__xmlRaiseError(NULL, NULL, NULL,
490
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
491
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
493
ctxt->wellFormed = 0;
494
if (ctxt->recovery == 0)
495
ctxt->disableSAX = 1;
500
* xmlFatalErrMsgStrIntStr:
501
* @ctxt: an XML parser context
502
* @error: the error number
503
* @msg: the error message
504
* @str1: an string info
505
* @val: an integer value
506
* @str2: an string info
508
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
511
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512
const char *msg, const xmlChar *str1, int val,
515
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
516
(ctxt->instate == XML_PARSER_EOF))
520
__xmlRaiseError(NULL, NULL, NULL,
521
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
522
NULL, 0, (const char *) str1, (const char *) str2,
523
NULL, val, 0, msg, str1, val, str2);
525
ctxt->wellFormed = 0;
526
if (ctxt->recovery == 0)
527
ctxt->disableSAX = 1;
533
* @ctxt: an XML parser context
534
* @error: the error number
535
* @msg: the error message
536
* @val: a string value
538
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
541
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
542
const char *msg, const xmlChar * val)
544
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545
(ctxt->instate == XML_PARSER_EOF))
549
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
550
XML_FROM_PARSER, error, XML_ERR_FATAL,
551
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554
ctxt->wellFormed = 0;
555
if (ctxt->recovery == 0)
556
ctxt->disableSAX = 1;
562
* @ctxt: an XML parser context
563
* @error: the error number
564
* @msg: the error message
565
* @val: a string value
567
* Handle a non fatal parser error
570
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
const char *msg, const xmlChar * val)
573
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
(ctxt->instate == XML_PARSER_EOF))
578
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
579
XML_FROM_PARSER, error, XML_ERR_ERROR,
580
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
586
* @ctxt: an XML parser context
587
* @error: the error number
589
* @info1: extra information string
590
* @info2: extra information string
592
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
595
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
597
const xmlChar * info1, const xmlChar * info2,
598
const xmlChar * info3)
600
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601
(ctxt->instate == XML_PARSER_EOF))
605
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
606
XML_ERR_ERROR, NULL, 0, (const char *) info1,
607
(const char *) info2, (const char *) info3, 0, 0, msg,
608
info1, info2, info3);
610
ctxt->nsWellFormed = 0;
613
/************************************************************************
615
* Library wide options *
617
************************************************************************/
621
* @feature: the feature to be examined
623
* Examines if the library has been compiled with a given feature.
625
* Returns a non-zero value if the feature exist, otherwise zero.
626
* Returns zero (0) if the feature does not exist or an unknown
627
* unknown feature is requested, non-zero otherwise.
630
xmlHasFeature(xmlFeature feature)
633
case XML_WITH_THREAD:
634
#ifdef LIBXML_THREAD_ENABLED
640
#ifdef LIBXML_TREE_ENABLED
645
case XML_WITH_OUTPUT:
646
#ifdef LIBXML_OUTPUT_ENABLED
652
#ifdef LIBXML_PUSH_ENABLED
657
case XML_WITH_READER:
658
#ifdef LIBXML_READER_ENABLED
663
case XML_WITH_PATTERN:
664
#ifdef LIBXML_PATTERN_ENABLED
669
case XML_WITH_WRITER:
670
#ifdef LIBXML_WRITER_ENABLED
676
#ifdef LIBXML_SAX1_ENABLED
682
#ifdef LIBXML_FTP_ENABLED
688
#ifdef LIBXML_HTTP_ENABLED
694
#ifdef LIBXML_VALID_ENABLED
700
#ifdef LIBXML_HTML_ENABLED
705
case XML_WITH_LEGACY:
706
#ifdef LIBXML_LEGACY_ENABLED
712
#ifdef LIBXML_C14N_ENABLED
717
case XML_WITH_CATALOG:
718
#ifdef LIBXML_CATALOG_ENABLED
724
#ifdef LIBXML_XPATH_ENABLED
730
#ifdef LIBXML_XPTR_ENABLED
735
case XML_WITH_XINCLUDE:
736
#ifdef LIBXML_XINCLUDE_ENABLED
742
#ifdef LIBXML_ICONV_ENABLED
747
case XML_WITH_ISO8859X:
748
#ifdef LIBXML_ISO8859X_ENABLED
753
case XML_WITH_UNICODE:
754
#ifdef LIBXML_UNICODE_ENABLED
759
case XML_WITH_REGEXP:
760
#ifdef LIBXML_REGEXP_ENABLED
765
case XML_WITH_AUTOMATA:
766
#ifdef LIBXML_AUTOMATA_ENABLED
772
#ifdef LIBXML_EXPR_ENABLED
777
case XML_WITH_SCHEMAS:
778
#ifdef LIBXML_SCHEMAS_ENABLED
783
case XML_WITH_SCHEMATRON:
784
#ifdef LIBXML_SCHEMATRON_ENABLED
789
case XML_WITH_MODULES:
790
#ifdef LIBXML_MODULES_ENABLED
796
#ifdef LIBXML_DEBUG_ENABLED
801
case XML_WITH_DEBUG_MEM:
802
#ifdef DEBUG_MEMORY_LOCATION
807
case XML_WITH_DEBUG_RUN:
808
#ifdef LIBXML_DEBUG_RUNTIME
814
#ifdef LIBXML_ZLIB_ENABLED
825
/************************************************************************
827
* SAX2 defaulted attributes handling *
829
************************************************************************/
833
* @ctxt: an XML parser context
835
* Do the SAX2 detection and specific intialization
838
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
839
if (ctxt == NULL) return;
840
#ifdef LIBXML_SAX1_ENABLED
841
if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
842
((ctxt->sax->startElementNs != NULL) ||
843
(ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
846
#endif /* LIBXML_SAX1_ENABLED */
848
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
849
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
850
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
851
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
852
(ctxt->str_xml_ns == NULL)) {
853
xmlErrMemory(ctxt, NULL);
857
typedef struct _xmlDefAttrs xmlDefAttrs;
858
typedef xmlDefAttrs *xmlDefAttrsPtr;
859
struct _xmlDefAttrs {
860
int nbAttrs; /* number of defaulted attributes on that element */
861
int maxAttrs; /* the size of the array */
862
const xmlChar *values[4]; /* array of localname/prefix/values */
866
* xmlAttrNormalizeSpace:
867
* @src: the source string
868
* @dst: the target string
870
* Normalize the space in non CDATA attribute values:
871
* If the attribute type is not CDATA, then the XML processor MUST further
872
* process the normalized attribute value by discarding any leading and
873
* trailing space (#x20) characters, and by replacing sequences of space
874
* (#x20) characters by a single space (#x20) character.
875
* Note that the size of dst need to be at least src, and if one doesn't need
876
* to preserve dst (and it doesn't come from a dictionary or read-only) then
877
* passing src as dst is just fine.
879
* Returns a pointer to the normalized value (dst) or NULL if no conversion
883
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
885
if ((src == NULL) || (dst == NULL))
888
while (*src == 0x20) src++;
891
while (*src == 0x20) src++;
905
* xmlAttrNormalizeSpace2:
906
* @src: the source string
908
* Normalize the space in non CDATA attribute values, a slightly more complex
909
* front end to avoid allocation problems when running on attribute values
910
* coming from the input.
912
* Returns a pointer to the normalized value (dst) or NULL if no conversion
915
static const xmlChar *
916
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
920
int need_realloc = 0;
923
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
930
while (*cur == 0x20) {
937
if ((*cur == 0x20) || (*cur == 0)) {
947
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
949
xmlErrMemory(ctxt, NULL);
952
xmlAttrNormalizeSpace(ret, ret);
953
*len = (int) strlen((const char *)ret);
955
} else if (remove_head) {
957
return(src + remove_head);
964
* @ctxt: an XML parser context
965
* @fullname: the element fullname
966
* @fullattr: the attribute fullname
967
* @value: the attribute value
969
* Add a defaulted attribute for an element
972
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
973
const xmlChar *fullname,
974
const xmlChar *fullattr,
975
const xmlChar *value) {
976
xmlDefAttrsPtr defaults;
979
const xmlChar *prefix;
982
* Allows to detect attribute redefinitions
984
if (ctxt->attsSpecial != NULL) {
985
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
989
if (ctxt->attsDefault == NULL) {
990
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
991
if (ctxt->attsDefault == NULL)
996
* split the element name into prefix:localname , the string found
997
* are within the DTD and then not associated to namespace names.
999
name = xmlSplitQName3(fullname, &len);
1001
name = xmlDictLookup(ctxt->dict, fullname, -1);
1004
name = xmlDictLookup(ctxt->dict, name, -1);
1005
prefix = xmlDictLookup(ctxt->dict, fullname, len);
1009
* make sure there is some storage
1011
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1012
if (defaults == NULL) {
1013
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1014
(4 * 4) * sizeof(const xmlChar *));
1015
if (defaults == NULL)
1017
defaults->nbAttrs = 0;
1018
defaults->maxAttrs = 4;
1019
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1020
defaults, NULL) < 0) {
1024
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
1025
xmlDefAttrsPtr temp;
1027
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1028
(2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
1032
defaults->maxAttrs *= 2;
1033
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1034
defaults, NULL) < 0) {
1041
* Split the element name into prefix:localname , the string found
1042
* are within the DTD and hen not associated to namespace names.
1044
name = xmlSplitQName3(fullattr, &len);
1046
name = xmlDictLookup(ctxt->dict, fullattr, -1);
1049
name = xmlDictLookup(ctxt->dict, name, -1);
1050
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1053
defaults->values[4 * defaults->nbAttrs] = name;
1054
defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1055
/* intern the string and precompute the end */
1056
len = xmlStrlen(value);
1057
value = xmlDictLookup(ctxt->dict, value, len);
1058
defaults->values[4 * defaults->nbAttrs + 2] = value;
1059
defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1060
defaults->nbAttrs++;
1065
xmlErrMemory(ctxt, NULL);
1070
* xmlAddSpecialAttr:
1071
* @ctxt: an XML parser context
1072
* @fullname: the element fullname
1073
* @fullattr: the attribute fullname
1074
* @type: the attribute type
1076
* Register this attribute type
1079
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1080
const xmlChar *fullname,
1081
const xmlChar *fullattr,
1084
if (ctxt->attsSpecial == NULL) {
1085
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1086
if (ctxt->attsSpecial == NULL)
1090
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1093
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1094
(void *) (long) type);
1098
xmlErrMemory(ctxt, NULL);
1103
* xmlCleanSpecialAttrCallback:
1105
* Removes CDATA attributes from the special attribute table
1108
xmlCleanSpecialAttrCallback(void *payload, void *data,
1109
const xmlChar *fullname, const xmlChar *fullattr,
1110
const xmlChar *unused ATTRIBUTE_UNUSED) {
1111
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1113
if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1114
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1119
* xmlCleanSpecialAttr:
1120
* @ctxt: an XML parser context
1122
* Trim the list of attributes defined to remove all those of type
1123
* CDATA as they are not special. This call should be done when finishing
1124
* to parse the DTD and before starting to parse the document root.
1127
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1129
if (ctxt->attsSpecial == NULL)
1132
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1134
if (xmlHashSize(ctxt->attsSpecial) == 0) {
1135
xmlHashFree(ctxt->attsSpecial, NULL);
1136
ctxt->attsSpecial = NULL;
1142
* xmlCheckLanguageID:
1143
* @lang: pointer to the string value
1145
* Checks that the value conforms to the LanguageID production:
1147
* NOTE: this is somewhat deprecated, those productions were removed from
1148
* the XML Second edition.
1150
* [33] LanguageID ::= Langcode ('-' Subcode)*
1151
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
1152
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1153
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1154
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1155
* [38] Subcode ::= ([a-z] | [A-Z])+
1157
* Returns 1 if correct 0 otherwise
1160
xmlCheckLanguageID(const xmlChar * lang)
1162
const xmlChar *cur = lang;
1166
if (((cur[0] == 'i') && (cur[1] == '-')) ||
1167
((cur[0] == 'I') && (cur[1] == '-'))) {
1172
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1173
((cur[0] >= 'a') && (cur[0] <= 'z')))
1175
} else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1176
((cur[0] == 'X') && (cur[1] == '-'))) {
1181
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1182
((cur[0] >= 'a') && (cur[0] <= 'z')))
1184
} else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1185
((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1190
if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1191
((cur[0] >= 'a') && (cur[0] <= 'z')))
1197
while (cur[0] != 0) { /* non input consuming */
1201
if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1202
((cur[0] >= 'a') && (cur[0] <= 'z')))
1206
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1207
((cur[0] >= 'a') && (cur[0] <= 'z')))
1213
/************************************************************************
1215
* Parser stacks related functions and macros *
1217
************************************************************************/
1219
xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1220
const xmlChar ** str);
1225
* @ctxt: an XML parser context
1226
* @prefix: the namespace prefix or NULL
1227
* @URL: the namespace name
1229
* Pushes a new parser namespace on top of the ns stack
1231
* Returns -1 in case of error, -2 if the namespace should be discarded
1232
* and the index in the stack otherwise.
1235
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1237
if (ctxt->options & XML_PARSE_NSCLEAN) {
1239
for (i = 0;i < ctxt->nsNr;i += 2) {
1240
if (ctxt->nsTab[i] == prefix) {
1242
if (ctxt->nsTab[i + 1] == URL)
1244
/* out of scope keep it */
1249
if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1252
ctxt->nsTab = (const xmlChar **)
1253
xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1254
if (ctxt->nsTab == NULL) {
1255
xmlErrMemory(ctxt, NULL);
1259
} else if (ctxt->nsNr >= ctxt->nsMax) {
1260
const xmlChar ** tmp;
1262
tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1263
ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1265
xmlErrMemory(ctxt, NULL);
1271
ctxt->nsTab[ctxt->nsNr++] = prefix;
1272
ctxt->nsTab[ctxt->nsNr++] = URL;
1273
return (ctxt->nsNr);
1277
* @ctxt: an XML parser context
1278
* @nr: the number to pop
1280
* Pops the top @nr parser prefix/namespace from the ns stack
1282
* Returns the number of namespaces removed
1285
nsPop(xmlParserCtxtPtr ctxt, int nr)
1289
if (ctxt->nsTab == NULL) return(0);
1290
if (ctxt->nsNr < nr) {
1291
xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1294
if (ctxt->nsNr <= 0)
1297
for (i = 0;i < nr;i++) {
1299
ctxt->nsTab[ctxt->nsNr] = NULL;
1306
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1307
const xmlChar **atts;
1311
if (ctxt->atts == NULL) {
1312
maxatts = 55; /* allow for 10 attrs by default */
1313
atts = (const xmlChar **)
1314
xmlMalloc(maxatts * sizeof(xmlChar *));
1315
if (atts == NULL) goto mem_error;
1317
attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1318
if (attallocs == NULL) goto mem_error;
1319
ctxt->attallocs = attallocs;
1320
ctxt->maxatts = maxatts;
1321
} else if (nr + 5 > ctxt->maxatts) {
1322
maxatts = (nr + 5) * 2;
1323
atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1324
maxatts * sizeof(const xmlChar *));
1325
if (atts == NULL) goto mem_error;
1327
attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1328
(maxatts / 5) * sizeof(int));
1329
if (attallocs == NULL) goto mem_error;
1330
ctxt->attallocs = attallocs;
1331
ctxt->maxatts = maxatts;
1333
return(ctxt->maxatts);
1335
xmlErrMemory(ctxt, NULL);
1341
* @ctxt: an XML parser context
1342
* @value: the parser input
1344
* Pushes a new parser input on top of the input stack
1346
* Returns 0 in case of error, the index in the stack otherwise
1349
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1351
if ((ctxt == NULL) || (value == NULL))
1353
if (ctxt->inputNr >= ctxt->inputMax) {
1354
ctxt->inputMax *= 2;
1356
(xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1358
sizeof(ctxt->inputTab[0]));
1359
if (ctxt->inputTab == NULL) {
1360
xmlErrMemory(ctxt, NULL);
1364
ctxt->inputTab[ctxt->inputNr] = value;
1365
ctxt->input = value;
1366
return (ctxt->inputNr++);
1370
* @ctxt: an XML parser context
1372
* Pops the top parser input from the input stack
1374
* Returns the input just removed
1377
inputPop(xmlParserCtxtPtr ctxt)
1379
xmlParserInputPtr ret;
1383
if (ctxt->inputNr <= 0)
1386
if (ctxt->inputNr > 0)
1387
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1390
ret = ctxt->inputTab[ctxt->inputNr];
1391
ctxt->inputTab[ctxt->inputNr] = NULL;
1396
* @ctxt: an XML parser context
1397
* @value: the element node
1399
* Pushes a new element node on top of the node stack
1401
* Returns 0 in case of error, the index in the stack otherwise
1404
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1406
if (ctxt == NULL) return(0);
1407
if (ctxt->nodeNr >= ctxt->nodeMax) {
1410
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1412
sizeof(ctxt->nodeTab[0]));
1414
xmlErrMemory(ctxt, NULL);
1417
ctxt->nodeTab = tmp;
1420
if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1421
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1422
"Excessive depth in document: change xmlParserMaxDepth = %d\n",
1424
ctxt->instate = XML_PARSER_EOF;
1427
ctxt->nodeTab[ctxt->nodeNr] = value;
1429
return (ctxt->nodeNr++);
1433
* @ctxt: an XML parser context
1435
* Pops the top element node from the node stack
1437
* Returns the node just removed
1440
nodePop(xmlParserCtxtPtr ctxt)
1444
if (ctxt == NULL) return(NULL);
1445
if (ctxt->nodeNr <= 0)
1448
if (ctxt->nodeNr > 0)
1449
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1452
ret = ctxt->nodeTab[ctxt->nodeNr];
1453
ctxt->nodeTab[ctxt->nodeNr] = NULL;
1457
#ifdef LIBXML_PUSH_ENABLED
1460
* @ctxt: an XML parser context
1461
* @value: the element name
1462
* @prefix: the element prefix
1463
* @URI: the element namespace name
1465
* Pushes a new element name/prefix/URL on top of the name stack
1467
* Returns -1 in case of error, the index in the stack otherwise
1470
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1471
const xmlChar *prefix, const xmlChar *URI, int nsNr)
1473
if (ctxt->nameNr >= ctxt->nameMax) {
1474
const xmlChar * *tmp;
1477
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1479
sizeof(ctxt->nameTab[0]));
1484
ctxt->nameTab = tmp;
1485
tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1487
sizeof(ctxt->pushTab[0]));
1492
ctxt->pushTab = tmp2;
1494
ctxt->nameTab[ctxt->nameNr] = value;
1496
ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1497
ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1498
ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1499
return (ctxt->nameNr++);
1501
xmlErrMemory(ctxt, NULL);
1506
* @ctxt: an XML parser context
1508
* Pops the top element/prefix/URI name from the name stack
1510
* Returns the name just removed
1512
static const xmlChar *
1513
nameNsPop(xmlParserCtxtPtr ctxt)
1517
if (ctxt->nameNr <= 0)
1520
if (ctxt->nameNr > 0)
1521
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1524
ret = ctxt->nameTab[ctxt->nameNr];
1525
ctxt->nameTab[ctxt->nameNr] = NULL;
1528
#endif /* LIBXML_PUSH_ENABLED */
1532
* @ctxt: an XML parser context
1533
* @value: the element name
1535
* Pushes a new element name on top of the name stack
1537
* Returns -1 in case of error, the index in the stack otherwise
1540
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1542
if (ctxt == NULL) return (-1);
1544
if (ctxt->nameNr >= ctxt->nameMax) {
1545
const xmlChar * *tmp;
1547
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1549
sizeof(ctxt->nameTab[0]));
1554
ctxt->nameTab = tmp;
1556
ctxt->nameTab[ctxt->nameNr] = value;
1558
return (ctxt->nameNr++);
1560
xmlErrMemory(ctxt, NULL);
1565
* @ctxt: an XML parser context
1567
* Pops the top element name from the name stack
1569
* Returns the name just removed
1572
namePop(xmlParserCtxtPtr ctxt)
1576
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1579
if (ctxt->nameNr > 0)
1580
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1583
ret = ctxt->nameTab[ctxt->nameNr];
1584
ctxt->nameTab[ctxt->nameNr] = NULL;
1588
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1589
if (ctxt->spaceNr >= ctxt->spaceMax) {
1592
ctxt->spaceMax *= 2;
1593
tmp = (int *) xmlRealloc(ctxt->spaceTab,
1594
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1596
xmlErrMemory(ctxt, NULL);
1599
ctxt->spaceTab = tmp;
1601
ctxt->spaceTab[ctxt->spaceNr] = val;
1602
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1603
return(ctxt->spaceNr++);
1606
static int spacePop(xmlParserCtxtPtr ctxt) {
1608
if (ctxt->spaceNr <= 0) return(0);
1610
if (ctxt->spaceNr > 0)
1611
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1613
ctxt->space = &ctxt->spaceTab[0];
1614
ret = ctxt->spaceTab[ctxt->spaceNr];
1615
ctxt->spaceTab[ctxt->spaceNr] = -1;
1620
* Macros for accessing the content. Those should be used only by the parser,
1623
* Dirty macros, i.e. one often need to make assumption on the context to
1626
* CUR_PTR return the current pointer to the xmlChar to be parsed.
1627
* To be used with extreme caution since operations consuming
1628
* characters may move the input buffer to a different location !
1629
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1630
* This should be used internally by the parser
1631
* only to compare to ASCII values otherwise it would break when
1632
* running with UTF-8 encoding.
1633
* RAW same as CUR but in the input buffer, bypass any token
1634
* extraction that may have been done
1635
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1636
* to compare on ASCII based substring.
1637
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1638
* strings without newlines within the parser.
1639
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1640
* defined char within the parser.
1641
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1643
* NEXT Skip to the next character, this does the proper decoding
1644
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
1645
* NEXTL(l) Skip the current unicode character of l xmlChars long.
1646
* CUR_CHAR(l) returns the current unicode character (int), set l
1647
* to the number of xmlChars used for the encoding [0-5].
1648
* CUR_SCHAR same but operate on a string instead of the context
1649
* COPY_BUF copy the current unicode char to the target buffer, increment
1651
* GROW, SHRINK handling of input buffers
1654
#define RAW (*ctxt->input->cur)
1655
#define CUR (*ctxt->input->cur)
1656
#define NXT(val) ctxt->input->cur[(val)]
1657
#define CUR_PTR ctxt->input->cur
1659
#define CMP4( s, c1, c2, c3, c4 ) \
1660
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1661
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1662
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1663
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1664
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1665
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1666
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1667
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1668
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1669
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1670
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1671
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1672
((unsigned char *) s)[ 8 ] == c9 )
1673
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1674
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1675
((unsigned char *) s)[ 9 ] == c10 )
1677
#define SKIP(val) do { \
1678
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1679
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1680
if ((*ctxt->input->cur == 0) && \
1681
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1682
xmlPopInput(ctxt); \
1685
#define SKIPL(val) do { \
1687
for(skipl=0; skipl<val; skipl++) { \
1688
if (*(ctxt->input->cur) == '\n') { \
1689
ctxt->input->line++; ctxt->input->col = 1; \
1690
} else ctxt->input->col++; \
1692
ctxt->input->cur++; \
1694
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1695
if ((*ctxt->input->cur == 0) && \
1696
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1697
xmlPopInput(ctxt); \
1700
#define SHRINK if ((ctxt->progressive == 0) && \
1701
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1702
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1705
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1706
xmlParserInputShrink(ctxt->input);
1707
if ((*ctxt->input->cur == 0) &&
1708
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1712
#define GROW if ((ctxt->progressive == 0) && \
1713
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1716
static void xmlGROW (xmlParserCtxtPtr ctxt) {
1717
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1718
if ((*ctxt->input->cur == 0) &&
1719
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1723
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1725
#define NEXT xmlNextChar(ctxt)
1728
ctxt->input->col++; \
1729
ctxt->input->cur++; \
1731
if (*ctxt->input->cur == 0) \
1732
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1735
#define NEXTL(l) do { \
1736
if (*(ctxt->input->cur) == '\n') { \
1737
ctxt->input->line++; ctxt->input->col = 1; \
1738
} else ctxt->input->col++; \
1739
ctxt->input->cur += l; \
1740
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1743
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1744
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1746
#define COPY_BUF(l,b,i,v) \
1747
if (l == 1) b[i++] = (xmlChar) v; \
1748
else i += xmlCopyCharMultiByte(&b[i],v)
1751
* xmlSkipBlankChars:
1752
* @ctxt: the XML parser context
1754
* skip all blanks character found at that point in the input streams.
1755
* It pops up finished entities in the process if allowable at that point.
1757
* Returns the number of space chars skipped
1761
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1765
* It's Okay to use CUR/NEXT here since all the blanks are on
1768
if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1771
* if we are in the document content, go really fast
1773
cur = ctxt->input->cur;
1774
while (IS_BLANK_CH(*cur)) {
1776
ctxt->input->line++; ctxt->input->col = 1;
1781
ctxt->input->cur = cur;
1782
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1783
cur = ctxt->input->cur;
1786
ctxt->input->cur = cur;
1791
while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1796
while ((cur == 0) && (ctxt->inputNr > 1) &&
1797
(ctxt->instate != XML_PARSER_COMMENT)) {
1802
* Need to handle support of entities branching here
1804
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1805
} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1810
/************************************************************************
1812
* Commodity functions to handle entities *
1814
************************************************************************/
1818
* @ctxt: an XML parser context
1820
* xmlPopInput: the current input pointed by ctxt->input came to an end
1821
* pop it and return the next char.
1823
* Returns the current xmlChar in the parser context
1826
xmlPopInput(xmlParserCtxtPtr ctxt) {
1827
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1828
if (xmlParserDebugEntities)
1829
xmlGenericError(xmlGenericErrorContext,
1830
"Popping input %d\n", ctxt->inputNr);
1831
xmlFreeInputStream(inputPop(ctxt));
1832
if ((*ctxt->input->cur == 0) &&
1833
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1834
return(xmlPopInput(ctxt));
1840
* @ctxt: an XML parser context
1841
* @input: an XML parser input fragment (entity, XML fragment ...).
1843
* xmlPushInput: switch to a new input stream which is stacked on top
1844
* of the previous one(s).
1847
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1848
if (input == NULL) return;
1850
if (xmlParserDebugEntities) {
1851
if ((ctxt->input != NULL) && (ctxt->input->filename))
1852
xmlGenericError(xmlGenericErrorContext,
1853
"%s(%d): ", ctxt->input->filename,
1855
xmlGenericError(xmlGenericErrorContext,
1856
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1858
inputPush(ctxt, input);
1864
* @ctxt: an XML parser context
1866
* parse Reference declarations
1868
* [66] CharRef ::= '&#' [0-9]+ ';' |
1869
* '&#x' [0-9a-fA-F]+ ';'
1871
* [ WFC: Legal Character ]
1872
* Characters referred to using character references must match the
1873
* production for Char.
1875
* Returns the value parsed (as an int), 0 in case of error
1878
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1879
unsigned int val = 0;
1881
unsigned int outofrange = 0;
1884
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1886
if ((RAW == '&') && (NXT(1) == '#') &&
1890
while (RAW != ';') { /* loop blocked by count */
1895
if ((RAW >= '0') && (RAW <= '9'))
1896
val = val * 16 + (CUR - '0');
1897
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1898
val = val * 16 + (CUR - 'a') + 10;
1899
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1900
val = val * 16 + (CUR - 'A') + 10;
1902
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1913
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
1918
} else if ((RAW == '&') && (NXT(1) == '#')) {
1921
while (RAW != ';') { /* loop blocked by count */
1926
if ((RAW >= '0') && (RAW <= '9'))
1927
val = val * 10 + (CUR - '0');
1929
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1940
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
1946
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1950
* [ WFC: Legal Character ]
1951
* Characters referred to using character references must match the
1952
* production for Char.
1954
if ((IS_CHAR(val) && (outofrange == 0))) {
1957
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1958
"xmlParseCharRef: invalid xmlChar value %d\n",
1965
* xmlParseStringCharRef:
1966
* @ctxt: an XML parser context
1967
* @str: a pointer to an index in the string
1969
* parse Reference declarations, variant parsing from a string rather
1970
* than an an input flow.
1972
* [66] CharRef ::= '&#' [0-9]+ ';' |
1973
* '&#x' [0-9a-fA-F]+ ';'
1975
* [ WFC: Legal Character ]
1976
* Characters referred to using character references must match the
1977
* production for Char.
1979
* Returns the value parsed (as an int), 0 in case of error, str will be
1980
* updated to the current value of the index
1983
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1986
unsigned int val = 0;
1987
unsigned int outofrange = 0;
1989
if ((str == NULL) || (*str == NULL)) return(0);
1992
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1995
while (cur != ';') { /* Non input consuming loop */
1996
if ((cur >= '0') && (cur <= '9'))
1997
val = val * 16 + (cur - '0');
1998
else if ((cur >= 'a') && (cur <= 'f'))
1999
val = val * 16 + (cur - 'a') + 10;
2000
else if ((cur >= 'A') && (cur <= 'F'))
2001
val = val * 16 + (cur - 'A') + 10;
2003
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2015
} else if ((cur == '&') && (ptr[1] == '#')){
2018
while (cur != ';') { /* Non input consuming loops */
2019
if ((cur >= '0') && (cur <= '9'))
2020
val = val * 10 + (cur - '0');
2022
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2035
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2041
* [ WFC: Legal Character ]
2042
* Characters referred to using character references must match the
2043
* production for Char.
2045
if ((IS_CHAR(val) && (outofrange == 0))) {
2048
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2049
"xmlParseStringCharRef: invalid xmlChar value %d\n",
2056
* xmlNewBlanksWrapperInputStream:
2057
* @ctxt: an XML parser context
2058
* @entity: an Entity pointer
2060
* Create a new input stream for wrapping
2061
* blanks around a PEReference
2063
* Returns the new input stream or NULL
2066
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2068
static xmlParserInputPtr
2069
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2070
xmlParserInputPtr input;
2073
if (entity == NULL) {
2074
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2075
"xmlNewBlanksWrapperInputStream entity\n");
2078
if (xmlParserDebugEntities)
2079
xmlGenericError(xmlGenericErrorContext,
2080
"new blanks wrapper for entity: %s\n", entity->name);
2081
input = xmlNewInputStream(ctxt);
2082
if (input == NULL) {
2085
length = xmlStrlen(entity->name) + 5;
2086
buffer = xmlMallocAtomic(length);
2087
if (buffer == NULL) {
2088
xmlErrMemory(ctxt, NULL);
2094
buffer [length-3] = ';';
2095
buffer [length-2] = ' ';
2096
buffer [length-1] = 0;
2097
memcpy(buffer + 2, entity->name, length - 5);
2098
input->free = deallocblankswrapper;
2099
input->base = buffer;
2100
input->cur = buffer;
2101
input->length = length;
2102
input->end = &buffer[length];
2107
* xmlParserHandlePEReference:
2108
* @ctxt: the parser context
2110
* [69] PEReference ::= '%' Name ';'
2112
* [ WFC: No Recursion ]
2113
* A parsed entity must not contain a recursive
2114
* reference to itself, either directly or indirectly.
2116
* [ WFC: Entity Declared ]
2117
* In a document without any DTD, a document with only an internal DTD
2118
* subset which contains no parameter entity references, or a document
2119
* with "standalone='yes'", ... ... The declaration of a parameter
2120
* entity must precede any reference to it...
2122
* [ VC: Entity Declared ]
2123
* In a document with an external subset or external parameter entities
2124
* with "standalone='no'", ... ... The declaration of a parameter entity
2125
* must precede any reference to it...
2128
* Parameter-entity references may only appear in the DTD.
2129
* NOTE: misleading but this is handled.
2131
* A PEReference may have been detected in the current input stream
2132
* the handling is done accordingly to
2133
* http://www.w3.org/TR/REC-xml#entproc
2135
* - Included in literal in entity values
2136
* - Included as Parameter Entity reference within DTDs
2139
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2140
const xmlChar *name;
2141
xmlEntityPtr entity = NULL;
2142
xmlParserInputPtr input;
2144
if (RAW != '%') return;
2145
switch(ctxt->instate) {
2146
case XML_PARSER_CDATA_SECTION:
2148
case XML_PARSER_COMMENT:
2150
case XML_PARSER_START_TAG:
2152
case XML_PARSER_END_TAG:
2154
case XML_PARSER_EOF:
2155
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2157
case XML_PARSER_PROLOG:
2158
case XML_PARSER_START:
2159
case XML_PARSER_MISC:
2160
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2162
case XML_PARSER_ENTITY_DECL:
2163
case XML_PARSER_CONTENT:
2164
case XML_PARSER_ATTRIBUTE_VALUE:
2166
case XML_PARSER_SYSTEM_LITERAL:
2167
case XML_PARSER_PUBLIC_LITERAL:
2168
/* we just ignore it there */
2170
case XML_PARSER_EPILOG:
2171
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2173
case XML_PARSER_ENTITY_VALUE:
2175
* NOTE: in the case of entity values, we don't do the
2176
* substitution here since we need the literal
2177
* entity value to be able to save the internal
2178
* subset of the document.
2179
* This will be handled by xmlStringDecodeEntities
2182
case XML_PARSER_DTD:
2184
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2185
* In the internal DTD subset, parameter-entity references
2186
* can occur only where markup declarations can occur, not
2187
* within markup declarations.
2188
* In that case this is handled in xmlParseMarkupDecl
2190
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2192
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2195
case XML_PARSER_IGNORE:
2200
name = xmlParseName(ctxt);
2201
if (xmlParserDebugEntities)
2202
xmlGenericError(xmlGenericErrorContext,
2203
"PEReference: %s\n", name);
2205
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2209
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2210
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2211
if (entity == NULL) {
2214
* [ WFC: Entity Declared ]
2215
* In a document without any DTD, a document with only an
2216
* internal DTD subset which contains no parameter entity
2217
* references, or a document with "standalone='yes'", ...
2218
* ... The declaration of a parameter entity must precede
2219
* any reference to it...
2221
if ((ctxt->standalone == 1) ||
2222
((ctxt->hasExternalSubset == 0) &&
2223
(ctxt->hasPErefs == 0))) {
2224
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2225
"PEReference: %%%s; not found\n", name);
2228
* [ VC: Entity Declared ]
2229
* In a document with an external subset or external
2230
* parameter entities with "standalone='no'", ...
2231
* ... The declaration of a parameter entity must precede
2232
* any reference to it...
2234
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2235
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2236
"PEReference: %%%s; not found\n",
2239
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2240
"PEReference: %%%s; not found\n",
2244
} else if (ctxt->input->free != deallocblankswrapper) {
2245
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2246
xmlPushInput(ctxt, input);
2248
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2249
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2251
xmlCharEncoding enc;
2254
* handle the extra spaces added before and after
2255
* c.f. http://www.w3.org/TR/REC-xml#as-PE
2256
* this is done independently.
2258
input = xmlNewEntityInputStream(ctxt, entity);
2259
xmlPushInput(ctxt, input);
2262
* Get the 4 first bytes and decode the charset
2263
* if enc != XML_CHAR_ENCODING_NONE
2264
* plug some encoding conversion routines.
2265
* Note that, since we may have some non-UTF8
2266
* encoding (like UTF16, bug 135229), the 'length'
2267
* is not known, but we can calculate based upon
2268
* the amount of data in the buffer.
2271
if ((ctxt->input->end - ctxt->input->cur)>=4) {
2276
enc = xmlDetectCharEncoding(start, 4);
2277
if (enc != XML_CHAR_ENCODING_NONE) {
2278
xmlSwitchEncoding(ctxt, enc);
2282
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2283
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2284
(IS_BLANK_CH(NXT(5)))) {
2285
xmlParseTextDecl(ctxt);
2288
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2289
"PEReference: %s is not a parameter entity\n",
2294
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2300
* Macro used to grow the current buffer.
2302
#define growBuffer(buffer) { \
2304
buffer##_size *= 2; \
2306
xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2307
if (tmp == NULL) goto mem_error; \
2312
* xmlStringLenDecodeEntities:
2313
* @ctxt: the parser context
2314
* @str: the input string
2315
* @len: the string length
2316
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2317
* @end: an end marker xmlChar, 0 if none
2318
* @end2: an end marker xmlChar, 0 if none
2319
* @end3: an end marker xmlChar, 0 if none
2321
* Takes a entity string content and process to do the adequate substitutions.
2323
* [67] Reference ::= EntityRef | CharRef
2325
* [69] PEReference ::= '%' Name ';'
2327
* Returns A newly allocated string with the substitution done. The caller
2328
* must deallocate it !
2331
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2332
int what, xmlChar end, xmlChar end2, xmlChar end3) {
2333
xmlChar *buffer = NULL;
2334
int buffer_size = 0;
2336
xmlChar *current = NULL;
2337
xmlChar *rep = NULL;
2338
const xmlChar *last;
2343
if ((ctxt == NULL) || (str == NULL) || (len < 0))
2347
if (ctxt->depth > 40) {
2348
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2353
* allocate a translation buffer.
2355
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2356
buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2357
if (buffer == NULL) goto mem_error;
2360
* OK loop until we reach one of the ending char or a size limit.
2361
* we are operating on already parsed values.
2364
c = CUR_SCHAR(str, l);
2367
while ((c != 0) && (c != end) && /* non input consuming loop */
2368
(c != end2) && (c != end3)) {
2371
if ((c == '&') && (str[1] == '#')) {
2372
int val = xmlParseStringCharRef(ctxt, &str);
2374
COPY_BUF(0,buffer,nbchars,val);
2376
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2379
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2380
if (xmlParserDebugEntities)
2381
xmlGenericError(xmlGenericErrorContext,
2382
"String decoding Entity Reference: %.30s\n",
2384
ent = xmlParseStringEntityRef(ctxt, &str);
2385
if ((ent != NULL) &&
2386
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2387
if (ent->content != NULL) {
2388
COPY_BUF(0,buffer,nbchars,ent->content[0]);
2389
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2393
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2394
"predefined entity has no content\n");
2396
} else if ((ent != NULL) && (ent->content != NULL)) {
2398
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2403
while (*current != 0) { /* non input consuming loop */
2404
buffer[nbchars++] = *current++;
2406
buffer_size - XML_PARSER_BUFFER_SIZE) {
2413
} else if (ent != NULL) {
2414
int i = xmlStrlen(ent->name);
2415
const xmlChar *cur = ent->name;
2417
buffer[nbchars++] = '&';
2418
if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2422
buffer[nbchars++] = *cur++;
2423
buffer[nbchars++] = ';';
2425
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2426
if (xmlParserDebugEntities)
2427
xmlGenericError(xmlGenericErrorContext,
2428
"String decoding PE Reference: %.30s\n", str);
2429
ent = xmlParseStringPEReference(ctxt, &str);
2431
if (ent->content == NULL) {
2432
if (xmlLoadEntityContent(ctxt, ent) < 0) {
2436
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2441
while (*current != 0) { /* non input consuming loop */
2442
buffer[nbchars++] = *current++;
2444
buffer_size - XML_PARSER_BUFFER_SIZE) {
2453
COPY_BUF(l,buffer,nbchars,c);
2455
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2460
c = CUR_SCHAR(str, l);
2464
buffer[nbchars++] = 0;
2468
xmlErrMemory(ctxt, NULL);
2477
* xmlStringDecodeEntities:
2478
* @ctxt: the parser context
2479
* @str: the input string
2480
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2481
* @end: an end marker xmlChar, 0 if none
2482
* @end2: an end marker xmlChar, 0 if none
2483
* @end3: an end marker xmlChar, 0 if none
2485
* Takes a entity string content and process to do the adequate substitutions.
2487
* [67] Reference ::= EntityRef | CharRef
2489
* [69] PEReference ::= '%' Name ';'
2491
* Returns A newly allocated string with the substitution done. The caller
2492
* must deallocate it !
2495
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2496
xmlChar end, xmlChar end2, xmlChar end3) {
2497
if ((ctxt == NULL) || (str == NULL)) return(NULL);
2498
return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2502
/************************************************************************
2504
* Commodity functions, cleanup needed ? *
2506
************************************************************************/
2510
* @ctxt: an XML parser context
2512
* @len: the size of @str
2513
* @blank_chars: we know the chars are blanks
2515
* Is this a sequence of blank chars that one can ignore ?
2517
* Returns 1 if ignorable 0 otherwise.
2520
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2523
xmlNodePtr lastChild;
2526
* Don't spend time trying to differentiate them, the same callback is
2529
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2533
* Check for xml:space value.
2535
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2536
(*(ctxt->space) == -2))
2540
* Check that the string is made of blanks
2542
if (blank_chars == 0) {
2543
for (i = 0;i < len;i++)
2544
if (!(IS_BLANK_CH(str[i]))) return(0);
2548
* Look if the element is mixed content in the DTD if available
2550
if (ctxt->node == NULL) return(0);
2551
if (ctxt->myDoc != NULL) {
2552
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2553
if (ret == 0) return(1);
2554
if (ret == 1) return(0);
2558
* Otherwise, heuristic :-\
2560
if ((RAW != '<') && (RAW != 0xD)) return(0);
2561
if ((ctxt->node->children == NULL) &&
2562
(RAW == '<') && (NXT(1) == '/')) return(0);
2564
lastChild = xmlGetLastChild(ctxt->node);
2565
if (lastChild == NULL) {
2566
if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2567
(ctxt->node->content != NULL)) return(0);
2568
} else if (xmlNodeIsText(lastChild))
2570
else if ((ctxt->node->children != NULL) &&
2571
(xmlNodeIsText(ctxt->node->children)))
2576
/************************************************************************
2578
* Extra stuff for namespace support *
2579
* Relates to http://www.w3.org/TR/WD-xml-names *
2581
************************************************************************/
2585
* @ctxt: an XML parser context
2586
* @name: an XML parser context
2587
* @prefix: a xmlChar **
2589
* parse an UTF8 encoded XML qualified name string
2591
* [NS 5] QName ::= (Prefix ':')? LocalPart
2593
* [NS 6] Prefix ::= NCName
2595
* [NS 7] LocalPart ::= NCName
2597
* Returns the local part, and prefix is updated
2598
* to get the Prefix if any.
2602
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2603
xmlChar buf[XML_MAX_NAMELEN + 5];
2604
xmlChar *buffer = NULL;
2606
int max = XML_MAX_NAMELEN;
2607
xmlChar *ret = NULL;
2608
const xmlChar *cur = name;
2611
if (prefix == NULL) return(NULL);
2614
if (cur == NULL) return(NULL);
2616
#ifndef XML_XML_NAMESPACE
2617
/* xml: prefix is not really a namespace */
2618
if ((cur[0] == 'x') && (cur[1] == 'm') &&
2619
(cur[2] == 'l') && (cur[3] == ':'))
2620
return(xmlStrdup(name));
2623
/* nasty but well=formed */
2625
return(xmlStrdup(name));
2628
while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2634
* Okay someone managed to make a huge name, so he's ready to pay
2635
* for the processing speed.
2639
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2640
if (buffer == NULL) {
2641
xmlErrMemory(ctxt, NULL);
2644
memcpy(buffer, buf, len);
2645
while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2646
if (len + 10 > max) {
2650
tmp = (xmlChar *) xmlRealloc(buffer,
2651
max * sizeof(xmlChar));
2654
xmlErrMemory(ctxt, NULL);
2665
if ((c == ':') && (*cur == 0)) {
2669
return(xmlStrdup(name));
2673
ret = xmlStrndup(buf, len);
2677
max = XML_MAX_NAMELEN;
2685
return(xmlStrndup(BAD_CAST "", 0));
2690
* Check that the first character is proper to start
2693
if (!(((c >= 0x61) && (c <= 0x7A)) ||
2694
((c >= 0x41) && (c <= 0x5A)) ||
2695
(c == '_') || (c == ':'))) {
2697
int first = CUR_SCHAR(cur, l);
2699
if (!IS_LETTER(first) && (first != '_')) {
2700
xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2701
"Name %s is not XML Namespace compliant\n",
2707
while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2713
* Okay someone managed to make a huge name, so he's ready to pay
2714
* for the processing speed.
2718
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2719
if (buffer == NULL) {
2720
xmlErrMemory(ctxt, NULL);
2723
memcpy(buffer, buf, len);
2724
while (c != 0) { /* tested bigname2.xml */
2725
if (len + 10 > max) {
2729
tmp = (xmlChar *) xmlRealloc(buffer,
2730
max * sizeof(xmlChar));
2732
xmlErrMemory(ctxt, NULL);
2745
ret = xmlStrndup(buf, len);
2754
/************************************************************************
2756
* The parser itself *
2757
* Relates to http://www.w3.org/TR/REC-xml *
2759
************************************************************************/
2761
static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2762
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2763
int *len, int *alloc, int normalize);
2767
* @ctxt: an XML parser context
2769
* parse an XML name.
2771
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2772
* CombiningChar | Extender
2774
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
2776
* [6] Names ::= Name (#x20 Name)*
2778
* Returns the Name parsed or NULL
2782
xmlParseName(xmlParserCtxtPtr ctxt) {
2790
* Accelerator for simple ASCII names
2792
in = ctxt->input->cur;
2793
if (((*in >= 0x61) && (*in <= 0x7A)) ||
2794
((*in >= 0x41) && (*in <= 0x5A)) ||
2795
(*in == '_') || (*in == ':')) {
2797
while (((*in >= 0x61) && (*in <= 0x7A)) ||
2798
((*in >= 0x41) && (*in <= 0x5A)) ||
2799
((*in >= 0x30) && (*in <= 0x39)) ||
2800
(*in == '_') || (*in == '-') ||
2801
(*in == ':') || (*in == '.'))
2803
if ((*in > 0) && (*in < 0x80)) {
2804
count = in - ctxt->input->cur;
2805
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2806
ctxt->input->cur = in;
2807
ctxt->nbChars += count;
2808
ctxt->input->col += count;
2810
xmlErrMemory(ctxt, NULL);
2814
return(xmlParseNameComplex(ctxt));
2818
* xmlParseNameAndCompare:
2819
* @ctxt: an XML parser context
2821
* parse an XML name and compares for match
2822
* (specialized for endtag parsing)
2824
* Returns NULL for an illegal name, (xmlChar*) 1 for success
2825
* and the name for mismatch
2828
static const xmlChar *
2829
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2830
register const xmlChar *cmp = other;
2831
register const xmlChar *in;
2836
in = ctxt->input->cur;
2837
while (*in != 0 && *in == *cmp) {
2842
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2844
ctxt->input->cur = in;
2845
return (const xmlChar*) 1;
2847
/* failure (or end of input buffer), check with full function */
2848
ret = xmlParseName (ctxt);
2849
/* strings coming from the dictionnary direct compare possible */
2851
return (const xmlChar*) 1;
2856
static const xmlChar *
2857
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2863
* Handler for more complex cases
2867
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2868
(!IS_LETTER(c) && (c != '_') &&
2873
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2874
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2875
(c == '.') || (c == '-') ||
2876
(c == '_') || (c == ':') ||
2877
(IS_COMBINING(c)) ||
2878
(IS_EXTENDER(c)))) {
2879
if (count++ > 100) {
2887
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2888
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2889
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2893
* xmlParseStringName:
2894
* @ctxt: an XML parser context
2895
* @str: a pointer to the string pointer (IN/OUT)
2897
* parse an XML name.
2899
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2900
* CombiningChar | Extender
2902
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
2904
* [6] Names ::= Name (#x20 Name)*
2906
* Returns the Name parsed or NULL. The @str pointer
2907
* is updated to the current location in the string.
2911
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2912
xmlChar buf[XML_MAX_NAMELEN + 5];
2913
const xmlChar *cur = *str;
2917
c = CUR_SCHAR(cur, l);
2918
if (!IS_LETTER(c) && (c != '_') &&
2923
while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2924
(c == '.') || (c == '-') ||
2925
(c == '_') || (c == ':') ||
2926
(IS_COMBINING(c)) ||
2928
COPY_BUF(l,buf,len,c);
2930
c = CUR_SCHAR(cur, l);
2931
if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2933
* Okay someone managed to make a huge name, so he's ready to pay
2934
* for the processing speed.
2939
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2940
if (buffer == NULL) {
2941
xmlErrMemory(ctxt, NULL);
2944
memcpy(buffer, buf, len);
2945
while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2946
/* test bigentname.xml */
2947
(c == '.') || (c == '-') ||
2948
(c == '_') || (c == ':') ||
2949
(IS_COMBINING(c)) ||
2951
if (len + 10 > max) {
2954
tmp = (xmlChar *) xmlRealloc(buffer,
2955
max * sizeof(xmlChar));
2957
xmlErrMemory(ctxt, NULL);
2963
COPY_BUF(l,buffer,len,c);
2965
c = CUR_SCHAR(cur, l);
2973
return(xmlStrndup(buf, len));
2978
* @ctxt: an XML parser context
2980
* parse an XML Nmtoken.
2982
* [7] Nmtoken ::= (NameChar)+
2984
* [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2986
* Returns the Nmtoken parsed or NULL
2990
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2991
xmlChar buf[XML_MAX_NAMELEN + 5];
2999
while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
3000
(c == '.') || (c == '-') ||
3001
(c == '_') || (c == ':') ||
3002
(IS_COMBINING(c)) ||
3004
if (count++ > 100) {
3008
COPY_BUF(l,buf,len,c);
3011
if (len >= XML_MAX_NAMELEN) {
3013
* Okay someone managed to make a huge token, so he's ready to pay
3014
* for the processing speed.
3019
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3020
if (buffer == NULL) {
3021
xmlErrMemory(ctxt, NULL);
3024
memcpy(buffer, buf, len);
3025
while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
3026
(c == '.') || (c == '-') ||
3027
(c == '_') || (c == ':') ||
3028
(IS_COMBINING(c)) ||
3030
if (count++ > 100) {
3034
if (len + 10 > max) {
3038
tmp = (xmlChar *) xmlRealloc(buffer,
3039
max * sizeof(xmlChar));
3041
xmlErrMemory(ctxt, NULL);
3047
COPY_BUF(l,buffer,len,c);
3057
return(xmlStrndup(buf, len));
3061
* xmlParseEntityValue:
3062
* @ctxt: an XML parser context
3063
* @orig: if non-NULL store a copy of the original entity value
3065
* parse a value for ENTITY declarations
3067
* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3068
* "'" ([^%&'] | PEReference | Reference)* "'"
3070
* Returns the EntityValue parsed with reference substituted or NULL
3074
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3075
xmlChar *buf = NULL;
3077
int size = XML_PARSER_BUFFER_SIZE;
3080
xmlChar *ret = NULL;
3081
const xmlChar *cur = NULL;
3082
xmlParserInputPtr input;
3084
if (RAW == '"') stop = '"';
3085
else if (RAW == '\'') stop = '\'';
3087
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3090
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3092
xmlErrMemory(ctxt, NULL);
3097
* The content of the entity definition is copied in a buffer.
3100
ctxt->instate = XML_PARSER_ENTITY_VALUE;
3101
input = ctxt->input;
3106
* NOTE: 4.4.5 Included in Literal
3107
* When a parameter entity reference appears in a literal entity
3108
* value, ... a single or double quote character in the replacement
3109
* text is always treated as a normal data character and will not
3110
* terminate the literal.
3111
* In practice it means we stop the loop only when back at parsing
3112
* the initial entity and the quote is found
3114
while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3115
(ctxt->input != input))) {
3116
if (len + 5 >= size) {
3120
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3122
xmlErrMemory(ctxt, NULL);
3128
COPY_BUF(l,buf,len,c);
3131
* Pop-up of finished entities.
3133
while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3146
* Raise problem w.r.t. '&' and '%' being used in non-entities
3147
* reference constructs. Note Charref will be handled in
3148
* xmlStringDecodeEntities()
3151
while (*cur != 0) { /* non input consuming */
3152
if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3157
name = xmlParseStringName(ctxt, &cur);
3158
if ((name == NULL) || (*cur != ';')) {
3159
xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3160
"EntityValue: '%c' forbidden except for entities references\n",
3163
if ((tmp == '%') && (ctxt->inSubset == 1) &&
3164
(ctxt->inputNr == 1)) {
3165
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3176
* Then PEReference entities are substituted.
3179
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3184
* NOTE: 4.4.7 Bypassed
3185
* When a general entity reference appears in the EntityValue in
3186
* an entity declaration, it is bypassed and left as is.
3187
* so XML_SUBSTITUTE_REF is not set here.
3189
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3201
* xmlParseAttValueComplex:
3202
* @ctxt: an XML parser context
3203
* @len: the resulting attribute len
3204
* @normalize: wether to apply the inner normalization
3206
* parse a value for an attribute, this is the fallback function
3207
* of xmlParseAttValue() when the attribute parsing requires handling
3208
* of non-ASCII characters, or normalization compaction.
3210
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3213
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3215
xmlChar *buf = NULL;
3216
xmlChar *rep = NULL;
3219
int c, l, in_space = 0;
3220
xmlChar *current = NULL;
3223
if (NXT(0) == '"') {
3224
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3227
} else if (NXT(0) == '\'') {
3229
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3232
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3237
* allocate a translation buffer.
3239
buf_size = XML_PARSER_BUFFER_SIZE;
3240
buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3241
if (buf == NULL) goto mem_error;
3244
* OK loop until we reach one of the ending char or a size limit.
3247
while ((NXT(0) != limit) && /* checked */
3248
(IS_CHAR(c)) && (c != '<')) {
3252
if (NXT(1) == '#') {
3253
int val = xmlParseCharRef(ctxt);
3256
if (ctxt->replaceEntities) {
3257
if (len > buf_size - 10) {
3263
* The reparsing will be done in xmlStringGetNodeList()
3264
* called by the attribute() function in SAX.c
3266
if (len > buf_size - 10) {
3275
} else if (val != 0) {
3276
if (len > buf_size - 10) {
3279
len += xmlCopyChar(0, &buf[len], val);
3282
ent = xmlParseEntityRef(ctxt);
3283
if ((ent != NULL) &&
3284
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3285
if (len > buf_size - 10) {
3288
if ((ctxt->replaceEntities == 0) &&
3289
(ent->content[0] == '&')) {
3296
buf[len++] = ent->content[0];
3298
} else if ((ent != NULL) &&
3299
(ctxt->replaceEntities != 0)) {
3300
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3301
rep = xmlStringDecodeEntities(ctxt, ent->content,
3306
while (*current != 0) { /* non input consuming */
3307
buf[len++] = *current++;
3308
if (len > buf_size - 10) {
3316
if (len > buf_size - 10) {
3319
if (ent->content != NULL)
3320
buf[len++] = ent->content[0];
3322
} else if (ent != NULL) {
3323
int i = xmlStrlen(ent->name);
3324
const xmlChar *cur = ent->name;
3327
* This may look absurd but is needed to detect
3330
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3331
(ent->content != NULL)) {
3332
rep = xmlStringDecodeEntities(ctxt, ent->content,
3333
XML_SUBSTITUTE_REF, 0, 0, 0);
3341
* Just output the reference
3344
if (len > buf_size - i - 10) {
3348
buf[len++] = *cur++;
3353
if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3354
if ((len != 0) || (!normalize)) {
3355
if ((!normalize) || (!in_space)) {
3356
COPY_BUF(l,buf,len,0x20);
3357
if (len > buf_size - 10) {
3365
COPY_BUF(l,buf,len,c);
3366
if (len > buf_size - 10) {
3375
if ((in_space) && (normalize)) {
3376
while (buf[len - 1] == 0x20) len--;
3380
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3381
} else if (RAW != limit) {
3382
if ((c != 0) && (!IS_CHAR(c))) {
3383
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3384
"invalid character in attribute value\n");
3386
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3387
"AttValue: ' expected\n");
3391
if (attlen != NULL) *attlen = len;
3395
xmlErrMemory(ctxt, NULL);
3405
* @ctxt: an XML parser context
3407
* parse a value for an attribute
3408
* Note: the parser won't do substitution of entities here, this
3409
* will be handled later in xmlStringGetNodeList
3411
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3412
* "'" ([^<&'] | Reference)* "'"
3414
* 3.3.3 Attribute-Value Normalization:
3415
* Before the value of an attribute is passed to the application or
3416
* checked for validity, the XML processor must normalize it as follows:
3417
* - a character reference is processed by appending the referenced
3418
* character to the attribute value
3419
* - an entity reference is processed by recursively processing the
3420
* replacement text of the entity
3421
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3422
* appending #x20 to the normalized value, except that only a single
3423
* #x20 is appended for a "#xD#xA" sequence that is part of an external
3424
* parsed entity or the literal entity value of an internal parsed entity
3425
* - other characters are processed by appending them to the normalized value
3426
* If the declared value is not CDATA, then the XML processor must further
3427
* process the normalized attribute value by discarding any leading and
3428
* trailing space (#x20) characters, and by replacing sequences of space
3429
* (#x20) characters by a single space (#x20) character.
3430
* All attributes for which no declaration has been read should be treated
3431
* by a non-validating parser as if declared CDATA.
3433
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3438
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3439
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3440
return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3444
* xmlParseSystemLiteral:
3445
* @ctxt: an XML parser context
3447
* parse an XML Literal
3449
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3451
* Returns the SystemLiteral parsed or NULL
3455
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3456
xmlChar *buf = NULL;
3458
int size = XML_PARSER_BUFFER_SIZE;
3461
int state = ctxt->instate;
3468
} else if (RAW == '\'') {
3472
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3476
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3478
xmlErrMemory(ctxt, NULL);
3481
ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3483
while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3484
if (len + 5 >= size) {
3488
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3491
xmlErrMemory(ctxt, NULL);
3492
ctxt->instate = (xmlParserInputState) state;
3502
COPY_BUF(l,buf,len,cur);
3512
ctxt->instate = (xmlParserInputState) state;
3513
if (!IS_CHAR(cur)) {
3514
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3522
* xmlParsePubidLiteral:
3523
* @ctxt: an XML parser context
3525
* parse an XML public literal
3527
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3529
* Returns the PubidLiteral parsed or NULL.
3533
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3534
xmlChar *buf = NULL;
3536
int size = XML_PARSER_BUFFER_SIZE;
3540
xmlParserInputState oldstate = ctxt->instate;
3546
} else if (RAW == '\'') {
3550
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3553
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3555
xmlErrMemory(ctxt, NULL);
3558
ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3560
while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3561
if (len + 1 >= size) {
3565
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3567
xmlErrMemory(ctxt, NULL);
3589
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3593
ctxt->instate = oldstate;
3597
void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3600
* used for the test in the inner loop of the char data testing
3602
static const unsigned char test_char_data[256] = {
3603
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3604
0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3605
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3606
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3607
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3608
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3609
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3610
0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3611
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3612
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3613
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3614
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3615
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3616
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3617
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3618
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3619
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3620
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3621
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3622
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3623
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3624
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3625
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3626
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3627
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3628
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3629
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3630
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3631
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3632
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3633
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3634
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3639
* @ctxt: an XML parser context
3640
* @cdata: int indicating whether we are within a CDATA section
3642
* parse a CharData section.
3643
* if we are within a CDATA section ']]>' marks an end of section.
3645
* The right angle bracket (>) may be represented using the string ">",
3646
* and must, for compatibility, be escaped using ">" or a character
3647
* reference when it appears in the string "]]>" in content, when that
3648
* string is not marking the end of a CDATA section.
3650
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3654
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3657
int line = ctxt->input->line;
3658
int col = ctxt->input->col;
3664
* Accelerated common case where input don't need to be
3665
* modified before passing it to the handler.
3668
in = ctxt->input->cur;
3671
while (*in == 0x20) { in++; ctxt->input->col++; }
3674
ctxt->input->line++; ctxt->input->col = 1;
3676
} while (*in == 0xA);
3677
goto get_more_space;
3680
nbchar = in - ctxt->input->cur;
3682
const xmlChar *tmp = ctxt->input->cur;
3683
ctxt->input->cur = in;
3685
if ((ctxt->sax != NULL) &&
3686
(ctxt->sax->ignorableWhitespace !=
3687
ctxt->sax->characters)) {
3688
if (areBlanks(ctxt, tmp, nbchar, 1)) {
3689
if (ctxt->sax->ignorableWhitespace != NULL)
3690
ctxt->sax->ignorableWhitespace(ctxt->userData,
3693
if (ctxt->sax->characters != NULL)
3694
ctxt->sax->characters(ctxt->userData,
3696
if (*ctxt->space == -1)
3699
} else if ((ctxt->sax != NULL) &&
3700
(ctxt->sax->characters != NULL)) {
3701
ctxt->sax->characters(ctxt->userData,
3709
ccol = ctxt->input->col;
3710
while (test_char_data[*in]) {
3714
ctxt->input->col = ccol;
3717
ctxt->input->line++; ctxt->input->col = 1;
3719
} while (*in == 0xA);
3723
if ((in[1] == ']') && (in[2] == '>')) {
3724
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3725
ctxt->input->cur = in;
3732
nbchar = in - ctxt->input->cur;
3734
if ((ctxt->sax != NULL) &&
3735
(ctxt->sax->ignorableWhitespace !=
3736
ctxt->sax->characters) &&
3737
(IS_BLANK_CH(*ctxt->input->cur))) {
3738
const xmlChar *tmp = ctxt->input->cur;
3739
ctxt->input->cur = in;
3741
if (areBlanks(ctxt, tmp, nbchar, 0)) {
3742
if (ctxt->sax->ignorableWhitespace != NULL)
3743
ctxt->sax->ignorableWhitespace(ctxt->userData,
3746
if (ctxt->sax->characters != NULL)
3747
ctxt->sax->characters(ctxt->userData,
3749
if (*ctxt->space == -1)
3752
line = ctxt->input->line;
3753
col = ctxt->input->col;
3754
} else if (ctxt->sax != NULL) {
3755
if (ctxt->sax->characters != NULL)
3756
ctxt->sax->characters(ctxt->userData,
3757
ctxt->input->cur, nbchar);
3758
line = ctxt->input->line;
3759
col = ctxt->input->col;
3762
ctxt->input->cur = in;
3766
ctxt->input->cur = in;
3768
ctxt->input->line++; ctxt->input->col = 1;
3769
continue; /* while */
3781
in = ctxt->input->cur;
3782
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3785
ctxt->input->line = line;
3786
ctxt->input->col = col;
3787
xmlParseCharDataComplex(ctxt, cdata);
3791
* xmlParseCharDataComplex:
3792
* @ctxt: an XML parser context
3793
* @cdata: int indicating whether we are within a CDATA section
3795
* parse a CharData section.this is the fallback function
3796
* of xmlParseCharData() when the parsing requires handling
3797
* of non-ASCII characters.
3800
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3801
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3809
while ((cur != '<') && /* checked */
3811
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3812
if ((cur == ']') && (NXT(1) == ']') &&
3816
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3819
COPY_BUF(l,buf,nbchar,cur);
3820
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3824
* OK the segment is to be consumed as chars.
3826
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3827
if (areBlanks(ctxt, buf, nbchar, 0)) {
3828
if (ctxt->sax->ignorableWhitespace != NULL)
3829
ctxt->sax->ignorableWhitespace(ctxt->userData,
3832
if (ctxt->sax->characters != NULL)
3833
ctxt->sax->characters(ctxt->userData, buf, nbchar);
3834
if ((ctxt->sax->characters !=
3835
ctxt->sax->ignorableWhitespace) &&
3836
(*ctxt->space == -1))
3853
* OK the segment is to be consumed as chars.
3855
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3856
if (areBlanks(ctxt, buf, nbchar, 0)) {
3857
if (ctxt->sax->ignorableWhitespace != NULL)
3858
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3860
if (ctxt->sax->characters != NULL)
3861
ctxt->sax->characters(ctxt->userData, buf, nbchar);
3862
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3863
(*ctxt->space == -1))
3868
if ((cur != 0) && (!IS_CHAR(cur))) {
3869
/* Generate the error and skip the offending character */
3870
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3871
"PCDATA invalid Char value %d\n",
3878
* xmlParseExternalID:
3879
* @ctxt: an XML parser context
3880
* @publicID: a xmlChar** receiving PubidLiteral
3881
* @strict: indicate whether we should restrict parsing to only
3882
* production [75], see NOTE below
3884
* Parse an External ID or a Public ID
3886
* NOTE: Productions [75] and [83] interact badly since [75] can generate
3887
* 'PUBLIC' S PubidLiteral S SystemLiteral
3889
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3890
* | 'PUBLIC' S PubidLiteral S SystemLiteral
3892
* [83] PublicID ::= 'PUBLIC' S PubidLiteral
3894
* Returns the function returns SystemLiteral and in the second
3895
* case publicID receives PubidLiteral, is strict is off
3896
* it is possible to return NULL and have publicID set.
3900
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3901
xmlChar *URI = NULL;
3906
if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3908
if (!IS_BLANK_CH(CUR)) {
3909
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3910
"Space required after 'SYSTEM'\n");
3913
URI = xmlParseSystemLiteral(ctxt);
3915
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3917
} else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3919
if (!IS_BLANK_CH(CUR)) {
3920
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3921
"Space required after 'PUBLIC'\n");
3924
*publicID = xmlParsePubidLiteral(ctxt);
3925
if (*publicID == NULL) {
3926
xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3930
* We don't handle [83] so "S SystemLiteral" is required.
3932
if (!IS_BLANK_CH(CUR)) {
3933
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3934
"Space required after the Public Identifier\n");
3938
* We handle [83] so we return immediately, if
3939
* "S SystemLiteral" is not detected. From a purely parsing
3940
* point of view that's a nice mess.
3946
if (!IS_BLANK_CH(*ptr)) return(NULL);
3948
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3949
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3952
URI = xmlParseSystemLiteral(ctxt);
3954
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3961
* xmlParseCommentComplex:
3962
* @ctxt: an XML parser context
3963
* @buf: the already parsed part of the buffer
3964
* @len: number of bytes filles in the buffer
3965
* @size: allocated size of the buffer
3967
* Skip an XML (SGML) comment <!-- .... -->
3968
* The spec says that "For compatibility, the string "--" (double-hyphen)
3969
* must not occur within comments. "
3970
* This is the slow routine in case the accelerator for ascii didn't work
3972
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3975
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3979
xmlParserInputPtr input = ctxt->input;
3984
size = XML_PARSER_BUFFER_SIZE;
3985
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3987
xmlErrMemory(ctxt, NULL);
3991
GROW; /* Assure there's enough input data */
3994
goto not_terminated;
3996
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3997
"xmlParseComment: invalid xmlChar value %d\n",
4005
goto not_terminated;
4007
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4008
"xmlParseComment: invalid xmlChar value %d\n",
4016
goto not_terminated;
4017
while (IS_CHAR(cur) && /* checked */
4019
(r != '-') || (q != '-'))) {
4020
if ((r == '-') && (q == '-')) {
4021
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4023
if (len + 5 >= size) {
4026
new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4027
if (new_buf == NULL) {
4029
xmlErrMemory(ctxt, NULL);
4034
COPY_BUF(ql,buf,len,q);
4055
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4056
"Comment not terminated \n<!--%.50s\n", buf);
4057
} else if (!IS_CHAR(cur)) {
4058
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4059
"xmlParseComment: invalid xmlChar value %d\n",
4062
if (input != ctxt->input) {
4063
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4064
"Comment doesn't start and stop in the same entity\n");
4067
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4068
(!ctxt->disableSAX))
4069
ctxt->sax->comment(ctxt->userData, buf);
4074
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4075
"Comment not terminated\n", NULL);
4082
* @ctxt: an XML parser context
4084
* Skip an XML (SGML) comment <!-- .... -->
4085
* The spec says that "For compatibility, the string "--" (double-hyphen)
4086
* must not occur within comments. "
4088
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4091
xmlParseComment(xmlParserCtxtPtr ctxt) {
4092
xmlChar *buf = NULL;
4093
int size = XML_PARSER_BUFFER_SIZE;
4095
xmlParserInputState state;
4097
int nbchar = 0, ccol;
4100
* Check that there is a comment right here.
4102
if ((RAW != '<') || (NXT(1) != '!') ||
4103
(NXT(2) != '-') || (NXT(3) != '-')) return;
4105
state = ctxt->instate;
4106
ctxt->instate = XML_PARSER_COMMENT;
4112
* Accelerated common case where input don't need to be
4113
* modified before passing it to the handler.
4115
in = ctxt->input->cur;
4119
ctxt->input->line++; ctxt->input->col = 1;
4121
} while (*in == 0xA);
4124
ccol = ctxt->input->col;
4125
while (((*in > '-') && (*in <= 0x7F)) ||
4126
((*in >= 0x20) && (*in < '-')) ||
4131
ctxt->input->col = ccol;
4134
ctxt->input->line++; ctxt->input->col = 1;
4136
} while (*in == 0xA);
4139
nbchar = in - ctxt->input->cur;
4141
* save current set of data
4144
if ((ctxt->sax != NULL) &&
4145
(ctxt->sax->comment != NULL)) {
4147
if ((*in == '-') && (in[1] == '-'))
4150
size = XML_PARSER_BUFFER_SIZE + nbchar;
4151
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4153
xmlErrMemory(ctxt, NULL);
4154
ctxt->instate = state;
4158
} else if (len + nbchar + 1 >= size) {
4160
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4161
new_buf = (xmlChar *) xmlRealloc(buf,
4162
size * sizeof(xmlChar));
4163
if (new_buf == NULL) {
4165
xmlErrMemory(ctxt, NULL);
4166
ctxt->instate = state;
4171
memcpy(&buf[len], ctxt->input->cur, nbchar);
4176
ctxt->input->cur = in;
4179
ctxt->input->line++; ctxt->input->col = 1;
4184
ctxt->input->cur = in;
4186
ctxt->input->line++; ctxt->input->col = 1;
4187
continue; /* while */
4193
in = ctxt->input->cur;
4198
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4199
(!ctxt->disableSAX)) {
4201
ctxt->sax->comment(ctxt->userData, buf);
4203
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4207
ctxt->instate = state;
4211
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4212
"Comment not terminated \n<!--%.50s\n",
4215
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4216
"Comment not terminated \n", NULL);
4224
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4225
xmlParseCommentComplex(ctxt, buf, len, size);
4226
ctxt->instate = state;
4233
* @ctxt: an XML parser context
4235
* parse the name of a PI
4237
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4239
* Returns the PITarget name or NULL
4243
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4244
const xmlChar *name;
4246
name = xmlParseName(ctxt);
4247
if ((name != NULL) &&
4248
((name[0] == 'x') || (name[0] == 'X')) &&
4249
((name[1] == 'm') || (name[1] == 'M')) &&
4250
((name[2] == 'l') || (name[2] == 'L'))) {
4252
if ((name[0] == 'x') && (name[1] == 'm') &&
4253
(name[2] == 'l') && (name[3] == 0)) {
4254
xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4255
"XML declaration allowed only at the start of the document\n");
4257
} else if (name[3] == 0) {
4258
xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4262
if (xmlW3CPIs[i] == NULL) break;
4263
if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4266
xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4267
"xmlParsePITarget: invalid name prefix 'xml'\n",
4273
#ifdef LIBXML_CATALOG_ENABLED
4275
* xmlParseCatalogPI:
4276
* @ctxt: an XML parser context
4277
* @catalog: the PI value string
4279
* parse an XML Catalog Processing Instruction.
4281
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4283
* Occurs only if allowed by the user and if happening in the Misc
4284
* part of the document before any doctype informations
4285
* This will add the given catalog to the parsing context in order
4286
* to be used if there is a resolution need further down in the document
4290
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4291
xmlChar *URL = NULL;
4292
const xmlChar *tmp, *base;
4296
while (IS_BLANK_CH(*tmp)) tmp++;
4297
if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4300
while (IS_BLANK_CH(*tmp)) tmp++;
4305
while (IS_BLANK_CH(*tmp)) tmp++;
4307
if ((marker != '\'') && (marker != '"'))
4311
while ((*tmp != 0) && (*tmp != marker)) tmp++;
4314
URL = xmlStrndup(base, tmp - base);
4316
while (IS_BLANK_CH(*tmp)) tmp++;
4321
ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4327
xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4328
"Catalog PI syntax error: %s\n",
4337
* @ctxt: an XML parser context
4339
* parse an XML Processing Instruction.
4341
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4343
* The processing is transfered to SAX once parsed.
4347
xmlParsePI(xmlParserCtxtPtr ctxt) {
4348
xmlChar *buf = NULL;
4350
int size = XML_PARSER_BUFFER_SIZE;
4352
const xmlChar *target;
4353
xmlParserInputState state;
4356
if ((RAW == '<') && (NXT(1) == '?')) {
4357
xmlParserInputPtr input = ctxt->input;
4358
state = ctxt->instate;
4359
ctxt->instate = XML_PARSER_PI;
4361
* this is a Processing Instruction.
4367
* Parse the target name and check for special support like
4370
target = xmlParsePITarget(ctxt);
4371
if (target != NULL) {
4372
if ((RAW == '?') && (NXT(1) == '>')) {
4373
if (input != ctxt->input) {
4374
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4375
"PI declaration doesn't start and stop in the same entity\n");
4382
if ((ctxt->sax) && (!ctxt->disableSAX) &&
4383
(ctxt->sax->processingInstruction != NULL))
4384
ctxt->sax->processingInstruction(ctxt->userData,
4386
ctxt->instate = state;
4389
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4391
xmlErrMemory(ctxt, NULL);
4392
ctxt->instate = state;
4396
if (!IS_BLANK(cur)) {
4397
xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4398
"ParsePI: PI %s space expected\n", target);
4402
while (IS_CHAR(cur) && /* checked */
4403
((cur != '?') || (NXT(1) != '>'))) {
4404
if (len + 5 >= size) {
4408
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4410
xmlErrMemory(ctxt, NULL);
4412
ctxt->instate = state;
4422
COPY_BUF(l,buf,len,cur);
4433
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4434
"ParsePI: PI %s never end ...\n", target);
4436
if (input != ctxt->input) {
4437
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4438
"PI declaration doesn't start and stop in the same entity\n");
4442
#ifdef LIBXML_CATALOG_ENABLED
4443
if (((state == XML_PARSER_MISC) ||
4444
(state == XML_PARSER_START)) &&
4445
(xmlStrEqual(target, XML_CATALOG_PI))) {
4446
xmlCatalogAllow allow = xmlCatalogGetDefaults();
4447
if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4448
(allow == XML_CATA_ALLOW_ALL))
4449
xmlParseCatalogPI(ctxt, buf);
4457
if ((ctxt->sax) && (!ctxt->disableSAX) &&
4458
(ctxt->sax->processingInstruction != NULL))
4459
ctxt->sax->processingInstruction(ctxt->userData,
4464
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4466
ctxt->instate = state;
4471
* xmlParseNotationDecl:
4472
* @ctxt: an XML parser context
4474
* parse a notation declaration
4476
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4478
* Hence there is actually 3 choices:
4479
* 'PUBLIC' S PubidLiteral
4480
* 'PUBLIC' S PubidLiteral S SystemLiteral
4481
* and 'SYSTEM' S SystemLiteral
4483
* See the NOTE on xmlParseExternalID().
4487
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4488
const xmlChar *name;
4492
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4493
xmlParserInputPtr input = ctxt->input;
4496
if (!IS_BLANK_CH(CUR)) {
4497
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4498
"Space required after '<!NOTATION'\n");
4503
name = xmlParseName(ctxt);
4505
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4508
if (!IS_BLANK_CH(CUR)) {
4509
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4510
"Space required after the NOTATION name'\n");
4518
Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4522
if (input != ctxt->input) {
4523
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4524
"Notation declaration doesn't start and stop in the same entity\n");
4527
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4528
(ctxt->sax->notationDecl != NULL))
4529
ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4531
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4533
if (Systemid != NULL) xmlFree(Systemid);
4534
if (Pubid != NULL) xmlFree(Pubid);
4539
* xmlParseEntityDecl:
4540
* @ctxt: an XML parser context
4542
* parse <!ENTITY declarations
4544
* [70] EntityDecl ::= GEDecl | PEDecl
4546
* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4548
* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4550
* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4552
* [74] PEDef ::= EntityValue | ExternalID
4554
* [76] NDataDecl ::= S 'NDATA' S Name
4556
* [ VC: Notation Declared ]
4557
* The Name must match the declared name of a notation.
4561
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4562
const xmlChar *name = NULL;
4563
xmlChar *value = NULL;
4564
xmlChar *URI = NULL, *literal = NULL;
4565
const xmlChar *ndata = NULL;
4566
int isParameter = 0;
4567
xmlChar *orig = NULL;
4570
/* GROW; done in the caller */
4571
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4572
xmlParserInputPtr input = ctxt->input;
4575
skipped = SKIP_BLANKS;
4577
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4578
"Space required after '<!ENTITY'\n");
4583
skipped = SKIP_BLANKS;
4585
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4586
"Space required after '%'\n");
4591
name = xmlParseName(ctxt);
4593
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4594
"xmlParseEntityDecl: no name\n");
4597
skipped = SKIP_BLANKS;
4599
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4600
"Space required after the entity name\n");
4603
ctxt->instate = XML_PARSER_ENTITY_DECL;
4605
* handle the various case of definitions...
4608
if ((RAW == '"') || (RAW == '\'')) {
4609
value = xmlParseEntityValue(ctxt, &orig);
4611
if ((ctxt->sax != NULL) &&
4612
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4613
ctxt->sax->entityDecl(ctxt->userData, name,
4614
XML_INTERNAL_PARAMETER_ENTITY,
4618
URI = xmlParseExternalID(ctxt, &literal, 1);
4619
if ((URI == NULL) && (literal == NULL)) {
4620
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4625
uri = xmlParseURI((const char *) URI);
4627
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4628
"Invalid URI: %s\n", URI);
4630
* This really ought to be a well formedness error
4631
* but the XML Core WG decided otherwise c.f. issue
4632
* E26 of the XML erratas.
4635
if (uri->fragment != NULL) {
4637
* Okay this is foolish to block those but not
4640
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4642
if ((ctxt->sax != NULL) &&
4643
(!ctxt->disableSAX) &&
4644
(ctxt->sax->entityDecl != NULL))
4645
ctxt->sax->entityDecl(ctxt->userData, name,
4646
XML_EXTERNAL_PARAMETER_ENTITY,
4647
literal, URI, NULL);
4654
if ((RAW == '"') || (RAW == '\'')) {
4655
value = xmlParseEntityValue(ctxt, &orig);
4656
if ((ctxt->sax != NULL) &&
4657
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4658
ctxt->sax->entityDecl(ctxt->userData, name,
4659
XML_INTERNAL_GENERAL_ENTITY,
4662
* For expat compatibility in SAX mode.
4664
if ((ctxt->myDoc == NULL) ||
4665
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4666
if (ctxt->myDoc == NULL) {
4667
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4668
if (ctxt->myDoc == NULL) {
4669
xmlErrMemory(ctxt, "New Doc failed");
4673
if (ctxt->myDoc->intSubset == NULL)
4674
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4675
BAD_CAST "fake", NULL, NULL);
4677
xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4681
URI = xmlParseExternalID(ctxt, &literal, 1);
4682
if ((URI == NULL) && (literal == NULL)) {
4683
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4688
uri = xmlParseURI((const char *)URI);
4690
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4691
"Invalid URI: %s\n", URI);
4693
* This really ought to be a well formedness error
4694
* but the XML Core WG decided otherwise c.f. issue
4695
* E26 of the XML erratas.
4698
if (uri->fragment != NULL) {
4700
* Okay this is foolish to block those but not
4703
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4708
if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4709
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4710
"Space required before 'NDATA'\n");
4713
if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4715
if (!IS_BLANK_CH(CUR)) {
4716
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4717
"Space required after 'NDATA'\n");
4720
ndata = xmlParseName(ctxt);
4721
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4722
(ctxt->sax->unparsedEntityDecl != NULL))
4723
ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4724
literal, URI, ndata);
4726
if ((ctxt->sax != NULL) &&
4727
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4728
ctxt->sax->entityDecl(ctxt->userData, name,
4729
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4730
literal, URI, NULL);
4732
* For expat compatibility in SAX mode.
4733
* assuming the entity repalcement was asked for
4735
if ((ctxt->replaceEntities != 0) &&
4736
((ctxt->myDoc == NULL) ||
4737
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4738
if (ctxt->myDoc == NULL) {
4739
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4740
if (ctxt->myDoc == NULL) {
4741
xmlErrMemory(ctxt, "New Doc failed");
4746
if (ctxt->myDoc->intSubset == NULL)
4747
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4748
BAD_CAST "fake", NULL, NULL);
4749
xmlSAX2EntityDecl(ctxt, name,
4750
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4751
literal, URI, NULL);
4758
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4759
"xmlParseEntityDecl: entity %s not terminated\n", name);
4761
if (input != ctxt->input) {
4762
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4763
"Entity declaration doesn't start and stop in the same entity\n");
4769
* Ugly mechanism to save the raw entity value.
4771
xmlEntityPtr cur = NULL;
4774
if ((ctxt->sax != NULL) &&
4775
(ctxt->sax->getParameterEntity != NULL))
4776
cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4778
if ((ctxt->sax != NULL) &&
4779
(ctxt->sax->getEntity != NULL))
4780
cur = ctxt->sax->getEntity(ctxt->userData, name);
4781
if ((cur == NULL) && (ctxt->userData==ctxt)) {
4782
cur = xmlSAX2GetEntity(ctxt, name);
4786
if (cur->orig != NULL)
4793
if (value != NULL) xmlFree(value);
4794
if (URI != NULL) xmlFree(URI);
4795
if (literal != NULL) xmlFree(literal);
4800
* xmlParseDefaultDecl:
4801
* @ctxt: an XML parser context
4802
* @value: Receive a possible fixed default value for the attribute
4804
* Parse an attribute default declaration
4806
* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4808
* [ VC: Required Attribute ]
4809
* if the default declaration is the keyword #REQUIRED, then the
4810
* attribute must be specified for all elements of the type in the
4811
* attribute-list declaration.
4813
* [ VC: Attribute Default Legal ]
4814
* The declared default value must meet the lexical constraints of
4815
* the declared attribute type c.f. xmlValidateAttributeDecl()
4817
* [ VC: Fixed Attribute Default ]
4818
* if an attribute has a default value declared with the #FIXED
4819
* keyword, instances of that attribute must match the default value.
4821
* [ WFC: No < in Attribute Values ]
4822
* handled in xmlParseAttValue()
4824
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4825
* or XML_ATTRIBUTE_FIXED.
4829
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4834
if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4836
return(XML_ATTRIBUTE_REQUIRED);
4838
if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4840
return(XML_ATTRIBUTE_IMPLIED);
4842
val = XML_ATTRIBUTE_NONE;
4843
if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4845
val = XML_ATTRIBUTE_FIXED;
4846
if (!IS_BLANK_CH(CUR)) {
4847
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4848
"Space required after '#FIXED'\n");
4852
ret = xmlParseAttValue(ctxt);
4853
ctxt->instate = XML_PARSER_DTD;
4855
xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4856
"Attribute default value declaration error\n");
4863
* xmlParseNotationType:
4864
* @ctxt: an XML parser context
4866
* parse an Notation attribute type.
4868
* Note: the leading 'NOTATION' S part has already being parsed...
4870
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4872
* [ VC: Notation Attributes ]
4873
* Values of this type must match one of the notation names included
4874
* in the declaration; all notation names in the declaration must be declared.
4876
* Returns: the notation attribute tree built while parsing
4880
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4881
const xmlChar *name;
4882
xmlEnumerationPtr ret = NULL, last = NULL, cur;
4885
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4892
name = xmlParseName(ctxt);
4894
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4895
"Name expected in NOTATION declaration\n");
4898
cur = xmlCreateEnumeration(name);
4899
if (cur == NULL) return(ret);
4900
if (last == NULL) ret = last = cur;
4906
} while (RAW == '|');
4908
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4909
if ((last != NULL) && (last != ret))
4910
xmlFreeEnumeration(last);
4918
* xmlParseEnumerationType:
4919
* @ctxt: an XML parser context
4921
* parse an Enumeration attribute type.
4923
* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4925
* [ VC: Enumeration ]
4926
* Values of this type must match one of the Nmtoken tokens in
4929
* Returns: the enumeration attribute tree built while parsing
4933
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4935
xmlEnumerationPtr ret = NULL, last = NULL, cur;
4938
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4945
name = xmlParseNmtoken(ctxt);
4947
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4950
cur = xmlCreateEnumeration(name);
4952
if (cur == NULL) return(ret);
4953
if (last == NULL) ret = last = cur;
4959
} while (RAW == '|');
4961
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4969
* xmlParseEnumeratedType:
4970
* @ctxt: an XML parser context
4971
* @tree: the enumeration tree built while parsing
4973
* parse an Enumerated attribute type.
4975
* [57] EnumeratedType ::= NotationType | Enumeration
4977
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4980
* Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4984
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4985
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4987
if (!IS_BLANK_CH(CUR)) {
4988
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4989
"Space required after 'NOTATION'\n");
4993
*tree = xmlParseNotationType(ctxt);
4994
if (*tree == NULL) return(0);
4995
return(XML_ATTRIBUTE_NOTATION);
4997
*tree = xmlParseEnumerationType(ctxt);
4998
if (*tree == NULL) return(0);
4999
return(XML_ATTRIBUTE_ENUMERATION);
5003
* xmlParseAttributeType:
5004
* @ctxt: an XML parser context
5005
* @tree: the enumeration tree built while parsing
5007
* parse the Attribute list def for an element
5009
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
5011
* [55] StringType ::= 'CDATA'
5013
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5014
* 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5016
* Validity constraints for attribute values syntax are checked in
5017
* xmlValidateAttributeValue()
5020
* Values of type ID must match the Name production. A name must not
5021
* appear more than once in an XML document as a value of this type;
5022
* i.e., ID values must uniquely identify the elements which bear them.
5024
* [ VC: One ID per Element Type ]
5025
* No element type may have more than one ID attribute specified.
5027
* [ VC: ID Attribute Default ]
5028
* An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5031
* Values of type IDREF must match the Name production, and values
5032
* of type IDREFS must match Names; each IDREF Name must match the value
5033
* of an ID attribute on some element in the XML document; i.e. IDREF
5034
* values must match the value of some ID attribute.
5036
* [ VC: Entity Name ]
5037
* Values of type ENTITY must match the Name production, values
5038
* of type ENTITIES must match Names; each Entity Name must match the
5039
* name of an unparsed entity declared in the DTD.
5041
* [ VC: Name Token ]
5042
* Values of type NMTOKEN must match the Nmtoken production; values
5043
* of type NMTOKENS must match Nmtokens.
5045
* Returns the attribute type
5048
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5050
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5052
return(XML_ATTRIBUTE_CDATA);
5053
} else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5055
return(XML_ATTRIBUTE_IDREFS);
5056
} else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5058
return(XML_ATTRIBUTE_IDREF);
5059
} else if ((RAW == 'I') && (NXT(1) == 'D')) {
5061
return(XML_ATTRIBUTE_ID);
5062
} else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5064
return(XML_ATTRIBUTE_ENTITY);
5065
} else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5067
return(XML_ATTRIBUTE_ENTITIES);
5068
} else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5070
return(XML_ATTRIBUTE_NMTOKENS);
5071
} else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5073
return(XML_ATTRIBUTE_NMTOKEN);
5075
return(xmlParseEnumeratedType(ctxt, tree));
5079
* xmlParseAttributeListDecl:
5080
* @ctxt: an XML parser context
5082
* : parse the Attribute list def for an element
5084
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5086
* [53] AttDef ::= S Name S AttType S DefaultDecl
5090
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5091
const xmlChar *elemName;
5092
const xmlChar *attrName;
5093
xmlEnumerationPtr tree;
5095
if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5096
xmlParserInputPtr input = ctxt->input;
5099
if (!IS_BLANK_CH(CUR)) {
5100
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5101
"Space required after '<!ATTLIST'\n");
5104
elemName = xmlParseName(ctxt);
5105
if (elemName == NULL) {
5106
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5107
"ATTLIST: no name for Element\n");
5112
while (RAW != '>') {
5113
const xmlChar *check = CUR_PTR;
5116
xmlChar *defaultValue = NULL;
5120
attrName = xmlParseName(ctxt);
5121
if (attrName == NULL) {
5122
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5123
"ATTLIST: no name for Attribute\n");
5127
if (!IS_BLANK_CH(CUR)) {
5128
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5129
"Space required after the attribute name\n");
5134
type = xmlParseAttributeType(ctxt, &tree);
5140
if (!IS_BLANK_CH(CUR)) {
5141
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5142
"Space required after the attribute type\n");
5144
xmlFreeEnumeration(tree);
5149
def = xmlParseDefaultDecl(ctxt, &defaultValue);
5151
if (defaultValue != NULL)
5152
xmlFree(defaultValue);
5154
xmlFreeEnumeration(tree);
5157
if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5158
xmlAttrNormalizeSpace(defaultValue, defaultValue);
5162
if (!IS_BLANK_CH(CUR)) {
5163
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5164
"Space required after the attribute default value\n");
5165
if (defaultValue != NULL)
5166
xmlFree(defaultValue);
5168
xmlFreeEnumeration(tree);
5173
if (check == CUR_PTR) {
5174
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5175
"in xmlParseAttributeListDecl\n");
5176
if (defaultValue != NULL)
5177
xmlFree(defaultValue);
5179
xmlFreeEnumeration(tree);
5182
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5183
(ctxt->sax->attributeDecl != NULL))
5184
ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5185
type, def, defaultValue, tree);
5186
else if (tree != NULL)
5187
xmlFreeEnumeration(tree);
5189
if ((ctxt->sax2) && (defaultValue != NULL) &&
5190
(def != XML_ATTRIBUTE_IMPLIED) &&
5191
(def != XML_ATTRIBUTE_REQUIRED)) {
5192
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5195
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5197
if (defaultValue != NULL)
5198
xmlFree(defaultValue);
5202
if (input != ctxt->input) {
5203
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5204
"Attribute list declaration doesn't start and stop in the same entity\n");
5212
* xmlParseElementMixedContentDecl:
5213
* @ctxt: an XML parser context
5214
* @inputchk: the input used for the current entity, needed for boundary checks
5216
* parse the declaration for a Mixed Element content
5217
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5219
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5220
* '(' S? '#PCDATA' S? ')'
5222
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5224
* [ VC: No Duplicate Types ]
5225
* The same name must not appear more than once in a single
5226
* mixed-content declaration.
5228
* returns: the list of the xmlElementContentPtr describing the element choices
5230
xmlElementContentPtr
5231
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5232
xmlElementContentPtr ret = NULL, cur = NULL, n;
5233
const xmlChar *elem = NULL;
5236
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5241
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5242
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5243
"Element content declaration doesn't start and stop in the same entity\n",
5247
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5251
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5256
if ((RAW == '(') || (RAW == '|')) {
5257
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5258
if (ret == NULL) return(NULL);
5260
while (RAW == '|') {
5263
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5264
if (ret == NULL) return(NULL);
5270
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5271
if (n == NULL) return(NULL);
5272
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5281
elem = xmlParseName(ctxt);
5283
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5284
"xmlParseElementMixedContentDecl : Name expected\n");
5285
xmlFreeDocElementContent(ctxt->myDoc, cur);
5291
if ((RAW == ')') && (NXT(1) == '*')) {
5293
cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5294
XML_ELEMENT_CONTENT_ELEMENT);
5295
if (cur->c2 != NULL)
5296
cur->c2->parent = cur;
5298
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5299
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5300
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5301
"Element content declaration doesn't start and stop in the same entity\n",
5306
xmlFreeDocElementContent(ctxt->myDoc, ret);
5307
xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5312
xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5318
* xmlParseElementChildrenContentDecl:
5319
* @ctxt: an XML parser context
5320
* @inputchk: the input used for the current entity, needed for boundary checks
5322
* parse the declaration for a Mixed Element content
5323
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5326
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
5328
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5330
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5332
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5334
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5335
* TODO Parameter-entity replacement text must be properly nested
5336
* with parenthesized groups. That is to say, if either of the
5337
* opening or closing parentheses in a choice, seq, or Mixed
5338
* construct is contained in the replacement text for a parameter
5339
* entity, both must be contained in the same replacement text. For
5340
* interoperability, if a parameter-entity reference appears in a
5341
* choice, seq, or Mixed construct, its replacement text should not
5342
* be empty, and neither the first nor last non-blank character of
5343
* the replacement text should be a connector (| or ,).
5345
* Returns the tree of xmlElementContentPtr describing the element
5348
xmlElementContentPtr
5349
xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5350
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5351
const xmlChar *elem;
5357
int inputid = ctxt->input->id;
5359
/* Recurse on first child */
5362
cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5366
elem = xmlParseName(ctxt);
5368
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5371
cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5373
xmlErrMemory(ctxt, NULL);
5378
cur->ocur = XML_ELEMENT_CONTENT_OPT;
5380
} else if (RAW == '*') {
5381
cur->ocur = XML_ELEMENT_CONTENT_MULT;
5383
} else if (RAW == '+') {
5384
cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5387
cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5393
while (RAW != ')') {
5395
* Each loop we parse one separator and one element.
5398
if (type == 0) type = CUR;
5401
* Detect "Name | Name , Name" error
5403
else if (type != CUR) {
5404
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5405
"xmlParseElementChildrenContentDecl : '%c' expected\n",
5407
if ((last != NULL) && (last != ret))
5408
xmlFreeDocElementContent(ctxt->myDoc, last);
5410
xmlFreeDocElementContent(ctxt->myDoc, ret);
5415
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5417
if ((last != NULL) && (last != ret))
5418
xmlFreeDocElementContent(ctxt->myDoc, last);
5419
xmlFreeDocElementContent(ctxt->myDoc, ret);
5437
} else if (RAW == '|') {
5438
if (type == 0) type = CUR;
5441
* Detect "Name , Name | Name" error
5443
else if (type != CUR) {
5444
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5445
"xmlParseElementChildrenContentDecl : '%c' expected\n",
5447
if ((last != NULL) && (last != ret))
5448
xmlFreeDocElementContent(ctxt->myDoc, last);
5450
xmlFreeDocElementContent(ctxt->myDoc, ret);
5455
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5457
if ((last != NULL) && (last != ret))
5458
xmlFreeDocElementContent(ctxt->myDoc, last);
5460
xmlFreeDocElementContent(ctxt->myDoc, ret);
5479
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5480
if ((last != NULL) && (last != ret))
5481
xmlFreeDocElementContent(ctxt->myDoc, last);
5483
xmlFreeDocElementContent(ctxt->myDoc, ret);
5490
int inputid = ctxt->input->id;
5491
/* Recurse on second child */
5494
last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5497
elem = xmlParseName(ctxt);
5499
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5501
xmlFreeDocElementContent(ctxt->myDoc, ret);
5504
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5507
xmlFreeDocElementContent(ctxt->myDoc, ret);
5511
last->ocur = XML_ELEMENT_CONTENT_OPT;
5513
} else if (RAW == '*') {
5514
last->ocur = XML_ELEMENT_CONTENT_MULT;
5516
} else if (RAW == '+') {
5517
last->ocur = XML_ELEMENT_CONTENT_PLUS;
5520
last->ocur = XML_ELEMENT_CONTENT_ONCE;
5526
if ((cur != NULL) && (last != NULL)) {
5531
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5532
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5533
"Element content declaration doesn't start and stop in the same entity\n",
5539
if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5540
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
5541
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5543
ret->ocur = XML_ELEMENT_CONTENT_OPT;
5546
} else if (RAW == '*') {
5548
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5551
* Some normalization:
5552
* (a | b* | c?)* == (a | b | c)*
5554
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5555
if ((cur->c1 != NULL) &&
5556
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5557
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5558
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5559
if ((cur->c2 != NULL) &&
5560
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5561
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5562
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5567
} else if (RAW == '+') {
5571
if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5572
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
5573
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5575
ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5577
* Some normalization:
5578
* (a | b*)+ == (a | b)*
5579
* (a | b?)+ == (a | b)*
5581
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5582
if ((cur->c1 != NULL) &&
5583
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5584
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5585
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5588
if ((cur->c2 != NULL) &&
5589
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5590
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5591
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5597
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5605
* xmlParseElementContentDecl:
5606
* @ctxt: an XML parser context
5607
* @name: the name of the element being defined.
5608
* @result: the Element Content pointer will be stored here if any
5610
* parse the declaration for an Element content either Mixed or Children,
5611
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5613
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5615
* returns: the type of element content XML_ELEMENT_TYPE_xxx
5619
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5620
xmlElementContentPtr *result) {
5622
xmlElementContentPtr tree = NULL;
5623
int inputid = ctxt->input->id;
5629
xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5630
"xmlParseElementContentDecl : %s '(' expected\n", name);
5636
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5637
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5638
res = XML_ELEMENT_TYPE_MIXED;
5640
tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5641
res = XML_ELEMENT_TYPE_ELEMENT;
5649
* xmlParseElementDecl:
5650
* @ctxt: an XML parser context
5652
* parse an Element declaration.
5654
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5656
* [ VC: Unique Element Type Declaration ]
5657
* No element type may be declared more than once
5659
* Returns the type of the element, or -1 in case of error
5662
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5663
const xmlChar *name;
5665
xmlElementContentPtr content = NULL;
5667
/* GROW; done in the caller */
5668
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5669
xmlParserInputPtr input = ctxt->input;
5672
if (!IS_BLANK_CH(CUR)) {
5673
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674
"Space required after 'ELEMENT'\n");
5677
name = xmlParseName(ctxt);
5679
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5680
"xmlParseElementDecl: no name for Element\n");
5683
while ((RAW == 0) && (ctxt->inputNr > 1))
5685
if (!IS_BLANK_CH(CUR)) {
5686
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5687
"Space required after the element name\n");
5690
if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5693
* Element must always be empty.
5695
ret = XML_ELEMENT_TYPE_EMPTY;
5696
} else if ((RAW == 'A') && (NXT(1) == 'N') &&
5700
* Element is a generic container.
5702
ret = XML_ELEMENT_TYPE_ANY;
5703
} else if (RAW == '(') {
5704
ret = xmlParseElementContentDecl(ctxt, name, &content);
5707
* [ WFC: PEs in Internal Subset ] error handling.
5709
if ((RAW == '%') && (ctxt->external == 0) &&
5710
(ctxt->inputNr == 1)) {
5711
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5712
"PEReference: forbidden within markup decl in internal subset\n");
5714
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5715
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5722
* Pop-up of finished entities.
5724
while ((RAW == 0) && (ctxt->inputNr > 1))
5729
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5730
if (content != NULL) {
5731
xmlFreeDocElementContent(ctxt->myDoc, content);
5734
if (input != ctxt->input) {
5735
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5736
"Element declaration doesn't start and stop in the same entity\n");
5740
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5741
(ctxt->sax->elementDecl != NULL)) {
5742
if (content != NULL)
5743
content->parent = NULL;
5744
ctxt->sax->elementDecl(ctxt->userData, name, ret,
5746
if ((content != NULL) && (content->parent == NULL)) {
5748
* this is a trick: if xmlAddElementDecl is called,
5749
* instead of copying the full tree it is plugged directly
5750
* if called from the parser. Avoid duplicating the
5751
* interfaces or change the API/ABI
5753
xmlFreeDocElementContent(ctxt->myDoc, content);
5755
} else if (content != NULL) {
5756
xmlFreeDocElementContent(ctxt->myDoc, content);
5764
* xmlParseConditionalSections
5765
* @ctxt: an XML parser context
5767
* [61] conditionalSect ::= includeSect | ignoreSect
5768
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5769
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5770
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5771
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5775
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5778
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5782
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5786
if (xmlParserDebugEntities) {
5787
if ((ctxt->input != NULL) && (ctxt->input->filename))
5788
xmlGenericError(xmlGenericErrorContext,
5789
"%s(%d): ", ctxt->input->filename,
5791
xmlGenericError(xmlGenericErrorContext,
5792
"Entering INCLUDE Conditional Section\n");
5795
while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5797
const xmlChar *check = CUR_PTR;
5798
unsigned int cons = ctxt->input->consumed;
5800
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5801
xmlParseConditionalSections(ctxt);
5802
} else if (IS_BLANK_CH(CUR)) {
5804
} else if (RAW == '%') {
5805
xmlParsePEReference(ctxt);
5807
xmlParseMarkupDecl(ctxt);
5810
* Pop-up of finished entities.
5812
while ((RAW == 0) && (ctxt->inputNr > 1))
5815
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5816
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5820
if (xmlParserDebugEntities) {
5821
if ((ctxt->input != NULL) && (ctxt->input->filename))
5822
xmlGenericError(xmlGenericErrorContext,
5823
"%s(%d): ", ctxt->input->filename,
5825
xmlGenericError(xmlGenericErrorContext,
5826
"Leaving INCLUDE Conditional Section\n");
5829
} else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5831
xmlParserInputState instate;
5837
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5841
if (xmlParserDebugEntities) {
5842
if ((ctxt->input != NULL) && (ctxt->input->filename))
5843
xmlGenericError(xmlGenericErrorContext,
5844
"%s(%d): ", ctxt->input->filename,
5846
xmlGenericError(xmlGenericErrorContext,
5847
"Entering IGNORE Conditional Section\n");
5851
* Parse up to the end of the conditional section
5852
* But disable SAX event generating DTD building in the meantime
5854
state = ctxt->disableSAX;
5855
instate = ctxt->instate;
5856
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5857
ctxt->instate = XML_PARSER_IGNORE;
5859
while ((depth >= 0) && (RAW != 0)) {
5860
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5865
if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5866
if (--depth >= 0) SKIP(3);
5873
ctxt->disableSAX = state;
5874
ctxt->instate = instate;
5876
if (xmlParserDebugEntities) {
5877
if ((ctxt->input != NULL) && (ctxt->input->filename))
5878
xmlGenericError(xmlGenericErrorContext,
5879
"%s(%d): ", ctxt->input->filename,
5881
xmlGenericError(xmlGenericErrorContext,
5882
"Leaving IGNORE Conditional Section\n");
5886
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5893
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5900
* xmlParseMarkupDecl:
5901
* @ctxt: an XML parser context
5903
* parse Markup declarations
5905
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5906
* NotationDecl | PI | Comment
5908
* [ VC: Proper Declaration/PE Nesting ]
5909
* Parameter-entity replacement text must be properly nested with
5910
* markup declarations. That is to say, if either the first character
5911
* or the last character of a markup declaration (markupdecl above) is
5912
* contained in the replacement text for a parameter-entity reference,
5913
* both must be contained in the same replacement text.
5915
* [ WFC: PEs in Internal Subset ]
5916
* In the internal DTD subset, parameter-entity references can occur
5917
* only where markup declarations can occur, not within markup declarations.
5918
* (This does not apply to references that occur in external parameter
5919
* entities or to the external subset.)
5922
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5925
if (NXT(1) == '!') {
5929
xmlParseElementDecl(ctxt);
5930
else if (NXT(3) == 'N')
5931
xmlParseEntityDecl(ctxt);
5934
xmlParseAttributeListDecl(ctxt);
5937
xmlParseNotationDecl(ctxt);
5940
xmlParseComment(ctxt);
5943
/* there is an error but it will be detected later */
5946
} else if (NXT(1) == '?') {
5951
* This is only for internal subset. On external entities,
5952
* the replacement is done before parsing stage
5954
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5955
xmlParsePEReference(ctxt);
5958
* Conditional sections are allowed from entities included
5959
* by PE References in the internal subset.
5961
if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5962
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5963
xmlParseConditionalSections(ctxt);
5967
ctxt->instate = XML_PARSER_DTD;
5972
* @ctxt: an XML parser context
5974
* parse an XML declaration header for external entities
5976
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5978
* Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5982
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5984
const xmlChar *encoding;
5987
* We know that '<?xml' is here.
5989
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5992
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5996
if (!IS_BLANK_CH(CUR)) {
5997
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5998
"Space needed after '<?xml'\n");
6003
* We may have the VersionInfo here.
6005
version = xmlParseVersionInfo(ctxt);
6006
if (version == NULL)
6007
version = xmlCharStrdup(XML_DEFAULT_VERSION);
6009
if (!IS_BLANK_CH(CUR)) {
6010
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6011
"Space needed here\n");
6014
ctxt->input->version = version;
6017
* We must have the encoding declaration
6019
encoding = xmlParseEncodingDecl(ctxt);
6020
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6022
* The XML REC instructs us to stop parsing right here
6026
if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6027
xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6028
"Missing encoding in text declaration\n");
6032
if ((RAW == '?') && (NXT(1) == '>')) {
6034
} else if (RAW == '>') {
6035
/* Deprecated old WD ... */
6036
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6039
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6040
MOVETO_ENDTAG(CUR_PTR);
6046
* xmlParseExternalSubset:
6047
* @ctxt: an XML parser context
6048
* @ExternalID: the external identifier
6049
* @SystemID: the system identifier (or URL)
6051
* parse Markup declarations from an external subset
6053
* [30] extSubset ::= textDecl? extSubsetDecl
6055
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6058
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6059
const xmlChar *SystemID) {
6060
xmlDetectSAX2(ctxt);
6063
if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6064
(ctxt->input->end - ctxt->input->cur >= 4)) {
6066
xmlCharEncoding enc;
6072
enc = xmlDetectCharEncoding(start, 4);
6073
if (enc != XML_CHAR_ENCODING_NONE)
6074
xmlSwitchEncoding(ctxt, enc);
6077
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6078
xmlParseTextDecl(ctxt);
6079
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6081
* The XML REC instructs us to stop parsing right here
6083
ctxt->instate = XML_PARSER_EOF;
6087
if (ctxt->myDoc == NULL) {
6088
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6090
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6091
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6093
ctxt->instate = XML_PARSER_DTD;
6095
while (((RAW == '<') && (NXT(1) == '?')) ||
6096
((RAW == '<') && (NXT(1) == '!')) ||
6097
(RAW == '%') || IS_BLANK_CH(CUR)) {
6098
const xmlChar *check = CUR_PTR;
6099
unsigned int cons = ctxt->input->consumed;
6102
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6103
xmlParseConditionalSections(ctxt);
6104
} else if (IS_BLANK_CH(CUR)) {
6106
} else if (RAW == '%') {
6107
xmlParsePEReference(ctxt);
6109
xmlParseMarkupDecl(ctxt);
6112
* Pop-up of finished entities.
6114
while ((RAW == 0) && (ctxt->inputNr > 1))
6117
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6118
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6124
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6130
* xmlParseReference:
6131
* @ctxt: an XML parser context
6133
* parse and handle entity references in content, depending on the SAX
6134
* interface, this may end-up in a call to character() if this is a
6135
* CharRef, a predefined entity, if there is no reference() callback.
6136
* or if the parser was asked to switch to that mode.
6138
* [67] Reference ::= EntityRef | CharRef
6141
xmlParseReference(xmlParserCtxtPtr ctxt) {
6144
if (RAW != '&') return;
6146
if (NXT(1) == '#') {
6150
int value = xmlParseCharRef(ctxt);
6154
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6156
* So we are using non-UTF-8 buffers
6157
* Check that the char fit on 8bits, if not
6158
* generate a CharRef.
6160
if (value <= 0xFF) {
6163
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6164
(!ctxt->disableSAX))
6165
ctxt->sax->characters(ctxt->userData, out, 1);
6167
if ((hex == 'x') || (hex == 'X'))
6168
snprintf((char *)out, sizeof(out), "#x%X", value);
6170
snprintf((char *)out, sizeof(out), "#%d", value);
6171
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6172
(!ctxt->disableSAX))
6173
ctxt->sax->reference(ctxt->userData, out);
6177
* Just encode the value in UTF-8
6179
COPY_BUF(0 ,out, i, value);
6181
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6182
(!ctxt->disableSAX))
6183
ctxt->sax->characters(ctxt->userData, out, i);
6188
ent = xmlParseEntityRef(ctxt);
6189
if (ent == NULL) return;
6190
if (!ctxt->wellFormed)
6192
was_checked = ent->checked;
6193
if ((ent->name != NULL) &&
6194
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6195
xmlNodePtr list = NULL;
6196
xmlParserErrors ret = XML_ERR_OK;
6200
* The first reference to the entity trigger a parsing phase
6201
* where the ent->children is filled with the result from
6204
if (ent->checked == 0) {
6207
value = ent->content;
6210
* Check that this entity is well formed
6212
if ((value != NULL) && (value[0] != 0) &&
6213
(value[1] == 0) && (value[0] == '<') &&
6214
(xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6216
* DONE: get definite answer on this !!!
6217
* Lots of entity decls are used to declare a single
6220
* Which seems to be valid since
6221
* 2.4: The ampersand character (&) and the left angle
6222
* bracket (<) may appear in their literal form only
6223
* when used ... They are also legal within the literal
6224
* entity value of an internal entity declaration;i
6225
* see "4.3.2 Well-Formed Parsed Entities".
6226
* IMHO 2.4 and 4.3.2 are directly in contradiction.
6227
* Looking at the OASIS test suite and James Clark
6228
* tests, this is broken. However the XML REC uses
6229
* it. Is the XML REC not well-formed ????
6230
* This is a hack to avoid this problem
6232
* ANSWER: since lt gt amp .. are already defined,
6233
* this is a redefinition and hence the fact that the
6234
* content is not well balanced is not a Wf error, this
6235
* is lousy but acceptable.
6237
list = xmlNewDocText(ctxt->myDoc, value);
6239
if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6240
(ent->children == NULL)) {
6241
ent->children = list;
6244
list->parent = (xmlNodePtr) ent;
6246
xmlFreeNodeList(list);
6248
} else if (list != NULL) {
6249
xmlFreeNodeList(list);
6253
* 4.3.2: An internal general parsed entity is well-formed
6254
* if its replacement text matches the production labeled
6260
* This is a bit hackish but this seems the best
6261
* way to make sure both SAX and DOM entity support
6264
if (ctxt->userData == ctxt)
6267
user_data = ctxt->userData;
6269
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6271
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6272
value, user_data, &list);
6274
} else if (ent->etype ==
6275
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6277
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6278
ctxt->sax, user_data, ctxt->depth,
6279
ent->URI, ent->ExternalID, &list);
6282
ret = XML_ERR_ENTITY_PE_INTERNAL;
6283
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6284
"invalid entity type found\n", NULL);
6286
if (ret == XML_ERR_ENTITY_LOOP) {
6287
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6289
} else if ((ret == XML_ERR_OK) && (list != NULL)) {
6290
if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6291
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6292
(ent->children == NULL)) {
6293
ent->children = list;
6294
if (ctxt->replaceEntities) {
6296
* Prune it directly in the generated document
6297
* except for single text nodes.
6299
if (((list->type == XML_TEXT_NODE) &&
6300
(list->next == NULL)) ||
6301
(ctxt->parseMode == XML_PARSE_READER)) {
6302
list->parent = (xmlNodePtr) ent;
6307
while (list != NULL) {
6308
list->parent = (xmlNodePtr) ctxt->node;
6309
list->doc = ctxt->myDoc;
6310
if (list->next == NULL)
6314
list = ent->children;
6315
#ifdef LIBXML_LEGACY_ENABLED
6316
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6317
xmlAddEntityReference(ent, list, NULL);
6318
#endif /* LIBXML_LEGACY_ENABLED */
6322
while (list != NULL) {
6323
list->parent = (xmlNodePtr) ent;
6324
if (list->next == NULL)
6330
xmlFreeNodeList(list);
6333
} else if ((ret != XML_ERR_OK) &&
6334
(ret != XML_WAR_UNDECLARED_ENTITY)) {
6335
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6336
"Entity '%s' failed to parse\n", ent->name);
6337
} else if (list != NULL) {
6338
xmlFreeNodeList(list);
6345
if (ent->children == NULL) {
6347
* Probably running in SAX mode and the callbacks don't
6348
* build the entity content. So unless we already went
6349
* though parsing for first checking go though the entity
6350
* content to generate callbacks associated to the entity
6352
if (was_checked == 1) {
6355
* This is a bit hackish but this seems the best
6356
* way to make sure both SAX and DOM entity support
6359
if (ctxt->userData == ctxt)
6362
user_data = ctxt->userData;
6364
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6366
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6367
ent->content, user_data, NULL);
6369
} else if (ent->etype ==
6370
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6372
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6373
ctxt->sax, user_data, ctxt->depth,
6374
ent->URI, ent->ExternalID, NULL);
6377
ret = XML_ERR_ENTITY_PE_INTERNAL;
6378
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6379
"invalid entity type found\n", NULL);
6381
if (ret == XML_ERR_ENTITY_LOOP) {
6382
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6386
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6387
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6389
* Entity reference callback comes second, it's somewhat
6390
* superfluous but a compatibility to historical behaviour
6392
ctxt->sax->reference(ctxt->userData, ent->name);
6396
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6397
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6401
ctxt->sax->reference(ctxt->userData, ent->name);
6404
if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6406
* There is a problem on the handling of _private for entities
6407
* (bug 155816): Should we copy the content of the field from
6408
* the entity (possibly overwriting some value set by the user
6409
* when a copy is created), should we leave it alone, or should
6410
* we try to take care of different situations? The problem
6411
* is exacerbated by the usage of this field by the xmlReader.
6412
* To fix this bug, we look at _private on the created node
6413
* and, if it's NULL, we copy in whatever was in the entity.
6414
* If it's not NULL we leave it alone. This is somewhat of a
6415
* hack - maybe we should have further tests to determine
6418
if ((ctxt->node != NULL) && (ent->children != NULL)) {
6420
* Seems we are generating the DOM content, do
6421
* a simple tree copy for all references except the first
6422
* In the first occurrence list contains the replacement.
6423
* progressive == 2 means we are operating on the Reader
6424
* and since nodes are discarded we must copy all the time.
6426
if (((list == NULL) && (ent->owner == 0)) ||
6427
(ctxt->parseMode == XML_PARSE_READER)) {
6428
xmlNodePtr nw = NULL, cur, firstChild = NULL;
6431
* when operating on a reader, the entities definitions
6432
* are always owning the entities subtree.
6433
if (ctxt->parseMode == XML_PARSE_READER)
6437
cur = ent->children;
6438
while (cur != NULL) {
6439
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6441
if (nw->_private == NULL)
6442
nw->_private = cur->_private;
6443
if (firstChild == NULL){
6446
nw = xmlAddChild(ctxt->node, nw);
6448
if (cur == ent->last) {
6450
* needed to detect some strange empty
6451
* node cases in the reader tests
6453
if ((ctxt->parseMode == XML_PARSE_READER) &&
6455
(nw->type == XML_ELEMENT_NODE) &&
6456
(nw->children == NULL))
6463
#ifdef LIBXML_LEGACY_ENABLED
6464
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6465
xmlAddEntityReference(ent, firstChild, nw);
6466
#endif /* LIBXML_LEGACY_ENABLED */
6467
} else if (list == NULL) {
6468
xmlNodePtr nw = NULL, cur, next, last,
6471
* Copy the entity child list and make it the new
6472
* entity child list. The goal is to make sure any
6473
* ID or REF referenced will be the one from the
6474
* document content and not the entity copy.
6476
cur = ent->children;
6477
ent->children = NULL;
6480
while (cur != NULL) {
6484
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6486
if (nw->_private == NULL)
6487
nw->_private = cur->_private;
6488
if (firstChild == NULL){
6491
xmlAddChild((xmlNodePtr) ent, nw);
6492
xmlAddChild(ctxt->node, cur);
6499
#ifdef LIBXML_LEGACY_ENABLED
6500
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6501
xmlAddEntityReference(ent, firstChild, nw);
6502
#endif /* LIBXML_LEGACY_ENABLED */
6504
const xmlChar *nbktext;
6507
* the name change is to avoid coalescing of the
6508
* node with a possible previous text one which
6509
* would make ent->children a dangling pointer
6511
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6513
if (ent->children->type == XML_TEXT_NODE)
6514
ent->children->name = nbktext;
6515
if ((ent->last != ent->children) &&
6516
(ent->last->type == XML_TEXT_NODE))
6517
ent->last->name = nbktext;
6518
xmlAddChildList(ctxt->node, ent->children);
6522
* This is to avoid a nasty side effect, see
6523
* characters() in SAX.c
6532
if (val == NULL) return;
6534
* inline the entity.
6536
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6537
(!ctxt->disableSAX))
6538
ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6544
* xmlParseEntityRef:
6545
* @ctxt: an XML parser context
6547
* parse ENTITY references declarations
6549
* [68] EntityRef ::= '&' Name ';'
6551
* [ WFC: Entity Declared ]
6552
* In a document without any DTD, a document with only an internal DTD
6553
* subset which contains no parameter entity references, or a document
6554
* with "standalone='yes'", the Name given in the entity reference
6555
* must match that in an entity declaration, except that well-formed
6556
* documents need not declare any of the following entities: amp, lt,
6557
* gt, apos, quot. The declaration of a parameter entity must precede
6558
* any reference to it. Similarly, the declaration of a general entity
6559
* must precede any reference to it which appears in a default value in an
6560
* attribute-list declaration. Note that if entities are declared in the
6561
* external subset or in external parameter entities, a non-validating
6562
* processor is not obligated to read and process their declarations;
6563
* for such documents, the rule that an entity must be declared is a
6564
* well-formedness constraint only if standalone='yes'.
6566
* [ WFC: Parsed Entity ]
6567
* An entity reference must not contain the name of an unparsed entity
6569
* Returns the xmlEntityPtr if found, or NULL otherwise.
6572
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6573
const xmlChar *name;
6574
xmlEntityPtr ent = NULL;
6580
name = xmlParseName(ctxt);
6582
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6583
"xmlParseEntityRef: no name\n");
6588
* Ask first SAX for entity resolution, otherwise try the
6591
if (ctxt->sax != NULL) {
6592
if (ctxt->sax->getEntity != NULL)
6593
ent = ctxt->sax->getEntity(ctxt->userData, name);
6594
if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6595
ent = xmlGetPredefinedEntity(name);
6596
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6597
(ctxt->userData==ctxt)) {
6598
ent = xmlSAX2GetEntity(ctxt, name);
6602
* [ WFC: Entity Declared ]
6603
* In a document without any DTD, a document with only an
6604
* internal DTD subset which contains no parameter entity
6605
* references, or a document with "standalone='yes'", the
6606
* Name given in the entity reference must match that in an
6607
* entity declaration, except that well-formed documents
6608
* need not declare any of the following entities: amp, lt,
6610
* The declaration of a parameter entity must precede any
6612
* Similarly, the declaration of a general entity must
6613
* precede any reference to it which appears in a default
6614
* value in an attribute-list declaration. Note that if
6615
* entities are declared in the external subset or in
6616
* external parameter entities, a non-validating processor
6617
* is not obligated to read and process their declarations;
6618
* for such documents, the rule that an entity must be
6619
* declared is a well-formedness constraint only if
6623
if ((ctxt->standalone == 1) ||
6624
((ctxt->hasExternalSubset == 0) &&
6625
(ctxt->hasPErefs == 0))) {
6626
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6627
"Entity '%s' not defined\n", name);
6629
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6630
"Entity '%s' not defined\n", name);
6631
if ((ctxt->inSubset == 0) &&
6632
(ctxt->sax != NULL) &&
6633
(ctxt->sax->reference != NULL)) {
6634
ctxt->sax->reference(ctxt->userData, name);
6641
* [ WFC: Parsed Entity ]
6642
* An entity reference must not contain the name of an
6645
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6646
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6647
"Entity reference to unparsed entity %s\n", name);
6651
* [ WFC: No External Entity References ]
6652
* Attribute values cannot contain direct or indirect
6653
* entity references to external entities.
6655
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6656
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6657
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6658
"Attribute references external entity '%s'\n", name);
6661
* [ WFC: No < in Attribute Values ]
6662
* The replacement text of any entity referred to directly or
6663
* indirectly in an attribute value (other than "<") must
6666
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6668
(!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6669
(ent->content != NULL) &&
6670
(xmlStrchr(ent->content, '<'))) {
6671
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6672
"'<' in entity '%s' is not allowed in attributes values\n", name);
6676
* Internal check, no parameter entities here ...
6679
switch (ent->etype) {
6680
case XML_INTERNAL_PARAMETER_ENTITY:
6681
case XML_EXTERNAL_PARAMETER_ENTITY:
6682
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6683
"Attempt to reference the parameter entity '%s'\n",
6692
* [ WFC: No Recursion ]
6693
* A parsed entity must not contain a recursive reference
6694
* to itself, either directly or indirectly.
6695
* Done somewhere else
6699
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6707
* xmlParseStringEntityRef:
6708
* @ctxt: an XML parser context
6709
* @str: a pointer to an index in the string
6711
* parse ENTITY references declarations, but this version parses it from
6714
* [68] EntityRef ::= '&' Name ';'
6716
* [ WFC: Entity Declared ]
6717
* In a document without any DTD, a document with only an internal DTD
6718
* subset which contains no parameter entity references, or a document
6719
* with "standalone='yes'", the Name given in the entity reference
6720
* must match that in an entity declaration, except that well-formed
6721
* documents need not declare any of the following entities: amp, lt,
6722
* gt, apos, quot. The declaration of a parameter entity must precede
6723
* any reference to it. Similarly, the declaration of a general entity
6724
* must precede any reference to it which appears in a default value in an
6725
* attribute-list declaration. Note that if entities are declared in the
6726
* external subset or in external parameter entities, a non-validating
6727
* processor is not obligated to read and process their declarations;
6728
* for such documents, the rule that an entity must be declared is a
6729
* well-formedness constraint only if standalone='yes'.
6731
* [ WFC: Parsed Entity ]
6732
* An entity reference must not contain the name of an unparsed entity
6734
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6735
* is updated to the current location in the string.
6738
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6742
xmlEntityPtr ent = NULL;
6744
if ((str == NULL) || (*str == NULL))
6751
name = xmlParseStringName(ctxt, &ptr);
6753
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6754
"xmlParseStringEntityRef: no name\n");
6759
* Ask first SAX for entity resolution, otherwise try the
6762
if (ctxt->sax != NULL) {
6763
if (ctxt->sax->getEntity != NULL)
6764
ent = ctxt->sax->getEntity(ctxt->userData, name);
6766
ent = xmlGetPredefinedEntity(name);
6767
if ((ent == NULL) && (ctxt->userData==ctxt)) {
6768
ent = xmlSAX2GetEntity(ctxt, name);
6772
* [ WFC: Entity Declared ]
6773
* In a document without any DTD, a document with only an
6774
* internal DTD subset which contains no parameter entity
6775
* references, or a document with "standalone='yes'", the
6776
* Name given in the entity reference must match that in an
6777
* entity declaration, except that well-formed documents
6778
* need not declare any of the following entities: amp, lt,
6780
* The declaration of a parameter entity must precede any
6782
* Similarly, the declaration of a general entity must
6783
* precede any reference to it which appears in a default
6784
* value in an attribute-list declaration. Note that if
6785
* entities are declared in the external subset or in
6786
* external parameter entities, a non-validating processor
6787
* is not obligated to read and process their declarations;
6788
* for such documents, the rule that an entity must be
6789
* declared is a well-formedness constraint only if
6793
if ((ctxt->standalone == 1) ||
6794
((ctxt->hasExternalSubset == 0) &&
6795
(ctxt->hasPErefs == 0))) {
6796
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6797
"Entity '%s' not defined\n", name);
6799
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6800
"Entity '%s' not defined\n",
6803
/* TODO ? check regressions ctxt->valid = 0; */
6807
* [ WFC: Parsed Entity ]
6808
* An entity reference must not contain the name of an
6811
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6812
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6813
"Entity reference to unparsed entity %s\n", name);
6817
* [ WFC: No External Entity References ]
6818
* Attribute values cannot contain direct or indirect
6819
* entity references to external entities.
6821
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6822
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6823
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6824
"Attribute references external entity '%s'\n", name);
6827
* [ WFC: No < in Attribute Values ]
6828
* The replacement text of any entity referred to directly or
6829
* indirectly in an attribute value (other than "<") must
6832
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6834
(!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6835
(ent->content != NULL) &&
6836
(xmlStrchr(ent->content, '<'))) {
6837
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6838
"'<' in entity '%s' is not allowed in attributes values\n",
6843
* Internal check, no parameter entities here ...
6846
switch (ent->etype) {
6847
case XML_INTERNAL_PARAMETER_ENTITY:
6848
case XML_EXTERNAL_PARAMETER_ENTITY:
6849
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6850
"Attempt to reference the parameter entity '%s'\n",
6859
* [ WFC: No Recursion ]
6860
* A parsed entity must not contain a recursive reference
6861
* to itself, either directly or indirectly.
6862
* Done somewhere else
6866
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6876
* xmlParsePEReference:
6877
* @ctxt: an XML parser context
6879
* parse PEReference declarations
6880
* The entity content is handled directly by pushing it's content as
6881
* a new input stream.
6883
* [69] PEReference ::= '%' Name ';'
6885
* [ WFC: No Recursion ]
6886
* A parsed entity must not contain a recursive
6887
* reference to itself, either directly or indirectly.
6889
* [ WFC: Entity Declared ]
6890
* In a document without any DTD, a document with only an internal DTD
6891
* subset which contains no parameter entity references, or a document
6892
* with "standalone='yes'", ... ... The declaration of a parameter
6893
* entity must precede any reference to it...
6895
* [ VC: Entity Declared ]
6896
* In a document with an external subset or external parameter entities
6897
* with "standalone='no'", ... ... The declaration of a parameter entity
6898
* must precede any reference to it...
6901
* Parameter-entity references may only appear in the DTD.
6902
* NOTE: misleading but this is handled.
6905
xmlParsePEReference(xmlParserCtxtPtr ctxt)
6907
const xmlChar *name;
6908
xmlEntityPtr entity = NULL;
6909
xmlParserInputPtr input;
6913
name = xmlParseName(ctxt);
6915
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6916
"xmlParsePEReference: no name\n");
6920
if ((ctxt->sax != NULL) &&
6921
(ctxt->sax->getParameterEntity != NULL))
6922
entity = ctxt->sax->getParameterEntity(ctxt->userData,
6924
if (entity == NULL) {
6926
* [ WFC: Entity Declared ]
6927
* In a document without any DTD, a document with only an
6928
* internal DTD subset which contains no parameter entity
6929
* references, or a document with "standalone='yes'", ...
6930
* ... The declaration of a parameter entity must precede
6931
* any reference to it...
6933
if ((ctxt->standalone == 1) ||
6934
((ctxt->hasExternalSubset == 0) &&
6935
(ctxt->hasPErefs == 0))) {
6936
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6937
"PEReference: %%%s; not found\n",
6941
* [ VC: Entity Declared ]
6942
* In a document with an external subset or external
6943
* parameter entities with "standalone='no'", ...
6944
* ... The declaration of a parameter entity must
6945
* precede any reference to it...
6947
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6948
"PEReference: %%%s; not found\n",
6954
* Internal checking in case the entity quest barfed
6956
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6957
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6958
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6959
"Internal: %%%s; is not a parameter entity\n",
6961
} else if (ctxt->input->free != deallocblankswrapper) {
6963
xmlNewBlanksWrapperInputStream(ctxt, entity);
6964
xmlPushInput(ctxt, input);
6968
* handle the extra spaces added before and after
6969
* c.f. http://www.w3.org/TR/REC-xml#as-PE
6971
input = xmlNewEntityInputStream(ctxt, entity);
6972
xmlPushInput(ctxt, input);
6973
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6974
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6975
(IS_BLANK_CH(NXT(5)))) {
6976
xmlParseTextDecl(ctxt);
6978
XML_ERR_UNSUPPORTED_ENCODING) {
6980
* The XML REC instructs us to stop parsing
6983
ctxt->instate = XML_PARSER_EOF;
6989
ctxt->hasPErefs = 1;
6991
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6998
* xmlLoadEntityContent:
6999
* @ctxt: an XML parser context
7000
* @entity: an unloaded system entity
7002
* Load the original content of the given system entity from the
7003
* ExternalID/SystemID given. This is to be used for Included in Literal
7004
* http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7006
* Returns 0 in case of success and -1 in case of failure
7009
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7010
xmlParserInputPtr input;
7015
if ((ctxt == NULL) || (entity == NULL) ||
7016
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7017
(entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7018
(entity->content != NULL)) {
7019
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7020
"xmlLoadEntityContent parameter error");
7024
if (xmlParserDebugEntities)
7025
xmlGenericError(xmlGenericErrorContext,
7026
"Reading %s entity content input\n", entity->name);
7028
buf = xmlBufferCreate();
7030
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7031
"xmlLoadEntityContent parameter error");
7035
input = xmlNewEntityInputStream(ctxt, entity);
7036
if (input == NULL) {
7037
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7038
"xmlLoadEntityContent input error");
7044
* Push the entity as the current input, read char by char
7045
* saving to the buffer until the end of the entity or an error
7047
xmlPushInput(ctxt, input);
7050
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7052
xmlBufferAdd(buf, ctxt->input->cur, l);
7053
if (count++ > 100) {
7061
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7063
} else if (!IS_CHAR(c)) {
7064
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7065
"xmlLoadEntityContent: invalid char value %d\n",
7070
entity->content = buf->content;
7071
buf->content = NULL;
7078
* xmlParseStringPEReference:
7079
* @ctxt: an XML parser context
7080
* @str: a pointer to an index in the string
7082
* parse PEReference declarations
7084
* [69] PEReference ::= '%' Name ';'
7086
* [ WFC: No Recursion ]
7087
* A parsed entity must not contain a recursive
7088
* reference to itself, either directly or indirectly.
7090
* [ WFC: Entity Declared ]
7091
* In a document without any DTD, a document with only an internal DTD
7092
* subset which contains no parameter entity references, or a document
7093
* with "standalone='yes'", ... ... The declaration of a parameter
7094
* entity must precede any reference to it...
7096
* [ VC: Entity Declared ]
7097
* In a document with an external subset or external parameter entities
7098
* with "standalone='no'", ... ... The declaration of a parameter entity
7099
* must precede any reference to it...
7102
* Parameter-entity references may only appear in the DTD.
7103
* NOTE: misleading but this is handled.
7105
* Returns the string of the entity content.
7106
* str is updated to the current value of the index
7109
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7113
xmlEntityPtr entity = NULL;
7115
if ((str == NULL) || (*str == NULL)) return(NULL);
7121
name = xmlParseStringName(ctxt, &ptr);
7123
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7124
"xmlParseStringPEReference: no name\n");
7130
if ((ctxt->sax != NULL) &&
7131
(ctxt->sax->getParameterEntity != NULL))
7132
entity = ctxt->sax->getParameterEntity(ctxt->userData,
7134
if (entity == NULL) {
7136
* [ WFC: Entity Declared ]
7137
* In a document without any DTD, a document with only an
7138
* internal DTD subset which contains no parameter entity
7139
* references, or a document with "standalone='yes'", ...
7140
* ... The declaration of a parameter entity must precede
7141
* any reference to it...
7143
if ((ctxt->standalone == 1) ||
7144
((ctxt->hasExternalSubset == 0) &&
7145
(ctxt->hasPErefs == 0))) {
7146
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7147
"PEReference: %%%s; not found\n", name);
7150
* [ VC: Entity Declared ]
7151
* In a document with an external subset or external
7152
* parameter entities with "standalone='no'", ...
7153
* ... The declaration of a parameter entity must
7154
* precede any reference to it...
7156
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7157
"PEReference: %%%s; not found\n",
7163
* Internal checking in case the entity quest barfed
7165
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7166
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7167
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7168
"%%%s; is not a parameter entity\n",
7172
ctxt->hasPErefs = 1;
7174
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7184
* xmlParseDocTypeDecl:
7185
* @ctxt: an XML parser context
7187
* parse a DOCTYPE declaration
7189
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7190
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7192
* [ VC: Root Element Type ]
7193
* The Name in the document type declaration must match the element
7194
* type of the root element.
7198
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7199
const xmlChar *name = NULL;
7200
xmlChar *ExternalID = NULL;
7201
xmlChar *URI = NULL;
7204
* We know that '<!DOCTYPE' has been detected.
7211
* Parse the DOCTYPE name.
7213
name = xmlParseName(ctxt);
7215
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7216
"xmlParseDocTypeDecl : no DOCTYPE name !\n");
7218
ctxt->intSubName = name;
7223
* Check for SystemID and ExternalID
7225
URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7227
if ((URI != NULL) || (ExternalID != NULL)) {
7228
ctxt->hasExternalSubset = 1;
7230
ctxt->extSubURI = URI;
7231
ctxt->extSubSystem = ExternalID;
7236
* Create and update the internal subset.
7238
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7239
(!ctxt->disableSAX))
7240
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7243
* Is there any internal subset declarations ?
7244
* they are handled separately in xmlParseInternalSubset()
7250
* We should be at the end of the DOCTYPE declaration.
7253
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7259
* xmlParseInternalSubset:
7260
* @ctxt: an XML parser context
7262
* parse the internal subset declaration
7264
* [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7268
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7270
* Is there any DTD definition ?
7273
ctxt->instate = XML_PARSER_DTD;
7276
* Parse the succession of Markup declarations and
7278
* Subsequence (markupdecl | PEReference | S)*
7280
while (RAW != ']') {
7281
const xmlChar *check = CUR_PTR;
7282
unsigned int cons = ctxt->input->consumed;
7285
xmlParseMarkupDecl(ctxt);
7286
xmlParsePEReference(ctxt);
7289
* Pop-up of finished entities.
7291
while ((RAW == 0) && (ctxt->inputNr > 1))
7294
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7295
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7296
"xmlParseInternalSubset: error detected in Markup declaration\n");
7307
* We should be at the end of the DOCTYPE declaration.
7310
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7315
#ifdef LIBXML_SAX1_ENABLED
7317
* xmlParseAttribute:
7318
* @ctxt: an XML parser context
7319
* @value: a xmlChar ** used to store the value of the attribute
7321
* parse an attribute
7323
* [41] Attribute ::= Name Eq AttValue
7325
* [ WFC: No External Entity References ]
7326
* Attribute values cannot contain direct or indirect entity references
7327
* to external entities.
7329
* [ WFC: No < in Attribute Values ]
7330
* The replacement text of any entity referred to directly or indirectly in
7331
* an attribute value (other than "<") must not contain a <.
7333
* [ VC: Attribute Value Type ]
7334
* The attribute must have been declared; the value must be of the type
7337
* [25] Eq ::= S? '=' S?
7341
* [NS 11] Attribute ::= QName Eq AttValue
7343
* Also the case QName == xmlns:??? is handled independently as a namespace
7346
* Returns the attribute name, and the value in *value.
7350
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7351
const xmlChar *name;
7356
name = xmlParseName(ctxt);
7358
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7359
"error parsing attribute name\n");
7370
val = xmlParseAttValue(ctxt);
7371
ctxt->instate = XML_PARSER_CONTENT;
7373
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7374
"Specification mandate value for attribute %s\n", name);
7379
* Check that xml:lang conforms to the specification
7380
* No more registered as an error, just generate a warning now
7381
* since this was deprecated in XML second edition
7383
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7384
if (!xmlCheckLanguageID(val)) {
7385
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7386
"Malformed value for xml:lang : %s\n",
7392
* Check that xml:space conforms to the specification
7394
if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7395
if (xmlStrEqual(val, BAD_CAST "default"))
7397
else if (xmlStrEqual(val, BAD_CAST "preserve"))
7400
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7401
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7412
* @ctxt: an XML parser context
7414
* parse a start of tag either for rule element or
7415
* EmptyElement. In both case we don't parse the tag closing chars.
7417
* [40] STag ::= '<' Name (S Attribute)* S? '>'
7419
* [ WFC: Unique Att Spec ]
7420
* No attribute name may appear more than once in the same start-tag or
7421
* empty-element tag.
7423
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7425
* [ WFC: Unique Att Spec ]
7426
* No attribute name may appear more than once in the same start-tag or
7427
* empty-element tag.
7431
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7433
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7435
* Returns the element name parsed
7439
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7440
const xmlChar *name;
7441
const xmlChar *attname;
7443
const xmlChar **atts = ctxt->atts;
7445
int maxatts = ctxt->maxatts;
7448
if (RAW != '<') return(NULL);
7451
name = xmlParseName(ctxt);
7453
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7454
"xmlParseStartTag: invalid element name\n");
7459
* Now parse the attributes, it ends up with the ending
7466
while ((RAW != '>') &&
7467
((RAW != '/') || (NXT(1) != '>')) &&
7468
(IS_BYTE_CHAR(RAW))) {
7469
const xmlChar *q = CUR_PTR;
7470
unsigned int cons = ctxt->input->consumed;
7472
attname = xmlParseAttribute(ctxt, &attvalue);
7473
if ((attname != NULL) && (attvalue != NULL)) {
7475
* [ WFC: Unique Att Spec ]
7476
* No attribute name may appear more than once in the same
7477
* start-tag or empty-element tag.
7479
for (i = 0; i < nbatts;i += 2) {
7480
if (xmlStrEqual(atts[i], attname)) {
7481
xmlErrAttributeDup(ctxt, NULL, attname);
7487
* Add the pair to atts
7490
maxatts = 22; /* allow for 10 attrs by default */
7491
atts = (const xmlChar **)
7492
xmlMalloc(maxatts * sizeof(xmlChar *));
7494
xmlErrMemory(ctxt, NULL);
7495
if (attvalue != NULL)
7500
ctxt->maxatts = maxatts;
7501
} else if (nbatts + 4 > maxatts) {
7505
n = (const xmlChar **) xmlRealloc((void *) atts,
7506
maxatts * sizeof(const xmlChar *));
7508
xmlErrMemory(ctxt, NULL);
7509
if (attvalue != NULL)
7515
ctxt->maxatts = maxatts;
7517
atts[nbatts++] = attname;
7518
atts[nbatts++] = attvalue;
7519
atts[nbatts] = NULL;
7520
atts[nbatts + 1] = NULL;
7522
if (attvalue != NULL)
7529
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7531
if (!IS_BLANK_CH(RAW)) {
7532
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7533
"attributes construct error\n");
7536
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7537
(attname == NULL) && (attvalue == NULL)) {
7538
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7539
"xmlParseStartTag: problem parsing attributes\n");
7547
* SAX: Start of Element !
7549
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7550
(!ctxt->disableSAX)) {
7552
ctxt->sax->startElement(ctxt->userData, name, atts);
7554
ctxt->sax->startElement(ctxt->userData, name, NULL);
7558
/* Free only the content strings */
7559
for (i = 1;i < nbatts;i+=2)
7560
if (atts[i] != NULL)
7561
xmlFree((xmlChar *) atts[i]);
7568
* @ctxt: an XML parser context
7569
* @line: line of the start tag
7570
* @nsNr: number of namespaces on the start tag
7572
* parse an end of tag
7574
* [42] ETag ::= '</' Name S? '>'
7578
* [NS 9] ETag ::= '</' QName S? '>'
7582
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7583
const xmlChar *name;
7586
if ((RAW != '<') || (NXT(1) != '/')) {
7587
xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7588
"xmlParseEndTag: '</' not found\n");
7593
name = xmlParseNameAndCompare(ctxt,ctxt->name);
7596
* We should definitely be at the ending "S? '>'" part
7600
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7601
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7606
* [ WFC: Element Type Match ]
7607
* The Name in an element's end-tag must match the element type in the
7611
if (name != (xmlChar*)1) {
7612
if (name == NULL) name = BAD_CAST "unparseable";
7613
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7614
"Opening and ending tag mismatch: %s line %d and %s\n",
7615
ctxt->name, line, name);
7621
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7622
(!ctxt->disableSAX))
7623
ctxt->sax->endElement(ctxt->userData, ctxt->name);
7632
* @ctxt: an XML parser context
7634
* parse an end of tag
7636
* [42] ETag ::= '</' Name S? '>'
7640
* [NS 9] ETag ::= '</' QName S? '>'
7644
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7645
xmlParseEndTag1(ctxt, 0);
7647
#endif /* LIBXML_SAX1_ENABLED */
7649
/************************************************************************
7651
* SAX 2 specific operations *
7653
************************************************************************/
7655
static const xmlChar *
7656
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7662
* Handler for more complex cases
7666
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7667
(!IS_LETTER(c) && (c != '_'))) {
7671
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7672
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7673
(c == '.') || (c == '-') || (c == '_') ||
7674
(IS_COMBINING(c)) ||
7675
(IS_EXTENDER(c)))) {
7676
if (count++ > 100) {
7684
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7689
* @ctxt: an XML parser context
7690
* @prefix: the prefix to lookup
7692
* Lookup the namespace name for the @prefix (which ca be NULL)
7693
* The prefix must come from the @ctxt->dict dictionnary
7695
* Returns the namespace name or NULL if not bound
7697
static const xmlChar *
7698
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7701
if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7702
for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7703
if (ctxt->nsTab[i] == prefix) {
7704
if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7706
return(ctxt->nsTab[i + 1]);
7713
* @ctxt: an XML parser context
7714
* @len: lenght of the string parsed
7716
* parse an XML name.
7718
* [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7719
* CombiningChar | Extender
7721
* [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7723
* Returns the Name parsed or NULL
7726
static const xmlChar *
7727
xmlParseNCName(xmlParserCtxtPtr ctxt) {
7733
* Accelerator for simple ASCII names
7735
in = ctxt->input->cur;
7736
if (((*in >= 0x61) && (*in <= 0x7A)) ||
7737
((*in >= 0x41) && (*in <= 0x5A)) ||
7740
while (((*in >= 0x61) && (*in <= 0x7A)) ||
7741
((*in >= 0x41) && (*in <= 0x5A)) ||
7742
((*in >= 0x30) && (*in <= 0x39)) ||
7743
(*in == '_') || (*in == '-') ||
7746
if ((*in > 0) && (*in < 0x80)) {
7747
count = in - ctxt->input->cur;
7748
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7749
ctxt->input->cur = in;
7750
ctxt->nbChars += count;
7751
ctxt->input->col += count;
7753
xmlErrMemory(ctxt, NULL);
7758
return(xmlParseNCNameComplex(ctxt));
7763
* @ctxt: an XML parser context
7764
* @prefix: pointer to store the prefix part
7766
* parse an XML Namespace QName
7768
* [6] QName ::= (Prefix ':')? LocalPart
7769
* [7] Prefix ::= NCName
7770
* [8] LocalPart ::= NCName
7772
* Returns the Name parsed or NULL
7775
static const xmlChar *
7776
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7777
const xmlChar *l, *p;
7781
l = xmlParseNCName(ctxt);
7784
l = xmlParseName(ctxt);
7786
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7787
"Failed to parse QName '%s'\n", l, NULL, NULL);
7797
l = xmlParseNCName(ctxt);
7801
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7802
"Failed to parse QName '%s:'\n", p, NULL, NULL);
7803
tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7804
p = xmlDictLookup(ctxt->dict, tmp, -1);
7805
if (tmp != NULL) xmlFree(tmp);
7812
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7813
"Failed to parse QName '%s:%s:'\n", p, l, NULL);
7815
tmp = (xmlChar *) xmlParseName(ctxt);
7817
tmp = xmlBuildQName(tmp, l, NULL, 0);
7818
l = xmlDictLookup(ctxt->dict, tmp, -1);
7819
if (tmp != NULL) xmlFree(tmp);
7823
tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7824
l = xmlDictLookup(ctxt->dict, tmp, -1);
7825
if (tmp != NULL) xmlFree(tmp);
7836
* xmlParseQNameAndCompare:
7837
* @ctxt: an XML parser context
7838
* @name: the localname
7839
* @prefix: the prefix, if any.
7841
* parse an XML name and compares for match
7842
* (specialized for endtag parsing)
7844
* Returns NULL for an illegal name, (xmlChar*) 1 for success
7845
* and the name for mismatch
7848
static const xmlChar *
7849
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7850
xmlChar const *prefix) {
7851
const xmlChar *cmp = name;
7854
const xmlChar *prefix2;
7856
if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7859
in = ctxt->input->cur;
7862
while (*in != 0 && *in == *cmp) {
7866
if ((*cmp == 0) && (*in == ':')) {
7869
while (*in != 0 && *in == *cmp) {
7873
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7875
ctxt->input->cur = in;
7876
return((const xmlChar*) 1);
7880
* all strings coms from the dictionary, equality can be done directly
7882
ret = xmlParseQName (ctxt, &prefix2);
7883
if ((ret == name) && (prefix == prefix2))
7884
return((const xmlChar*) 1);
7889
* xmlParseAttValueInternal:
7890
* @ctxt: an XML parser context
7891
* @len: attribute len result
7892
* @alloc: whether the attribute was reallocated as a new string
7893
* @normalize: if 1 then further non-CDATA normalization must be done
7895
* parse a value for an attribute.
7896
* NOTE: if no normalization is needed, the routine will return pointers
7897
* directly from the data buffer.
7899
* 3.3.3 Attribute-Value Normalization:
7900
* Before the value of an attribute is passed to the application or
7901
* checked for validity, the XML processor must normalize it as follows:
7902
* - a character reference is processed by appending the referenced
7903
* character to the attribute value
7904
* - an entity reference is processed by recursively processing the
7905
* replacement text of the entity
7906
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7907
* appending #x20 to the normalized value, except that only a single
7908
* #x20 is appended for a "#xD#xA" sequence that is part of an external
7909
* parsed entity or the literal entity value of an internal parsed entity
7910
* - other characters are processed by appending them to the normalized value
7911
* If the declared value is not CDATA, then the XML processor must further
7912
* process the normalized attribute value by discarding any leading and
7913
* trailing space (#x20) characters, and by replacing sequences of space
7914
* (#x20) characters by a single space (#x20) character.
7915
* All attributes for which no declaration has been read should be treated
7916
* by a non-validating parser as if declared CDATA.
7918
* Returns the AttValue parsed or NULL. The value has to be freed by the
7919
* caller if it was copied, this can be detected by val[*len] == 0.
7923
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7927
const xmlChar *in = NULL, *start, *end, *last;
7928
xmlChar *ret = NULL;
7931
in = (xmlChar *) CUR_PTR;
7932
if (*in != '"' && *in != '\'') {
7933
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7936
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7939
* try to handle in this routine the most common case where no
7940
* allocation of a new string is required and where content is
7944
end = ctxt->input->end;
7947
const xmlChar *oldbase = ctxt->input->base;
7949
if (oldbase != ctxt->input->base) {
7950
long delta = ctxt->input->base - oldbase;
7951
start = start + delta;
7954
end = ctxt->input->end;
7958
* Skip any leading spaces
7960
while ((in < end) && (*in != limit) &&
7961
((*in == 0x20) || (*in == 0x9) ||
7962
(*in == 0xA) || (*in == 0xD))) {
7966
const xmlChar *oldbase = ctxt->input->base;
7968
if (oldbase != ctxt->input->base) {
7969
long delta = ctxt->input->base - oldbase;
7970
start = start + delta;
7973
end = ctxt->input->end;
7976
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7977
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7978
if ((*in++ == 0x20) && (*in == 0x20)) break;
7980
const xmlChar *oldbase = ctxt->input->base;
7982
if (oldbase != ctxt->input->base) {
7983
long delta = ctxt->input->base - oldbase;
7984
start = start + delta;
7987
end = ctxt->input->end;
7992
* skip the trailing blanks
7994
while ((last[-1] == 0x20) && (last > start)) last--;
7995
while ((in < end) && (*in != limit) &&
7996
((*in == 0x20) || (*in == 0x9) ||
7997
(*in == 0xA) || (*in == 0xD))) {
8000
const xmlChar *oldbase = ctxt->input->base;
8002
if (oldbase != ctxt->input->base) {
8003
long delta = ctxt->input->base - oldbase;
8004
start = start + delta;
8006
last = last + delta;
8008
end = ctxt->input->end;
8011
if (*in != limit) goto need_complex;
8013
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8014
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8017
const xmlChar *oldbase = ctxt->input->base;
8019
if (oldbase != ctxt->input->base) {
8020
long delta = ctxt->input->base - oldbase;
8021
start = start + delta;
8024
end = ctxt->input->end;
8028
if (*in != limit) goto need_complex;
8032
*len = last - start;
8033
ret = (xmlChar *) start;
8035
if (alloc) *alloc = 1;
8036
ret = xmlStrndup(start, last - start);
8039
if (alloc) *alloc = 0;
8042
if (alloc) *alloc = 1;
8043
return xmlParseAttValueComplex(ctxt, len, normalize);
8047
* xmlParseAttribute2:
8048
* @ctxt: an XML parser context
8049
* @pref: the element prefix
8050
* @elem: the element name
8051
* @prefix: a xmlChar ** used to store the value of the attribute prefix
8052
* @value: a xmlChar ** used to store the value of the attribute
8053
* @len: an int * to save the length of the attribute
8054
* @alloc: an int * to indicate if the attribute was allocated
8056
* parse an attribute in the new SAX2 framework.
8058
* Returns the attribute name, and the value in *value, .
8061
static const xmlChar *
8062
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8063
const xmlChar * pref, const xmlChar * elem,
8064
const xmlChar ** prefix, xmlChar ** value,
8065
int *len, int *alloc)
8067
const xmlChar *name;
8068
xmlChar *val, *internal_val = NULL;
8073
name = xmlParseQName(ctxt, prefix);
8075
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8076
"error parsing attribute name\n");
8081
* get the type if needed
8083
if (ctxt->attsSpecial != NULL) {
8086
type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8087
pref, elem, *prefix, name);
8099
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8102
* Sometimes a second normalisation pass for spaces is needed
8103
* but that only happens if charrefs or entities refernces
8104
* have been used in the attribute value, i.e. the attribute
8105
* value have been extracted in an allocated string already.
8108
const xmlChar *val2;
8110
val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8113
val = (xmlChar *) val2;
8117
ctxt->instate = XML_PARSER_CONTENT;
8119
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8120
"Specification mandate value for attribute %s\n",
8125
if (*prefix == ctxt->str_xml) {
8127
* Check that xml:lang conforms to the specification
8128
* No more registered as an error, just generate a warning now
8129
* since this was deprecated in XML second edition
8131
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8132
internal_val = xmlStrndup(val, *len);
8133
if (!xmlCheckLanguageID(internal_val)) {
8134
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8135
"Malformed value for xml:lang : %s\n",
8136
internal_val, NULL);
8141
* Check that xml:space conforms to the specification
8143
if (xmlStrEqual(name, BAD_CAST "space")) {
8144
internal_val = xmlStrndup(val, *len);
8145
if (xmlStrEqual(internal_val, BAD_CAST "default"))
8147
else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8150
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8151
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8152
internal_val, NULL);
8156
xmlFree(internal_val);
8164
* xmlParseStartTag2:
8165
* @ctxt: an XML parser context
8167
* parse a start of tag either for rule element or
8168
* EmptyElement. In both case we don't parse the tag closing chars.
8169
* This routine is called when running SAX2 parsing
8171
* [40] STag ::= '<' Name (S Attribute)* S? '>'
8173
* [ WFC: Unique Att Spec ]
8174
* No attribute name may appear more than once in the same start-tag or
8175
* empty-element tag.
8177
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8179
* [ WFC: Unique Att Spec ]
8180
* No attribute name may appear more than once in the same start-tag or
8181
* empty-element tag.
8185
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8187
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8189
* Returns the element name parsed
8192
static const xmlChar *
8193
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8194
const xmlChar **URI, int *tlen) {
8195
const xmlChar *localname;
8196
const xmlChar *prefix;
8197
const xmlChar *attname;
8198
const xmlChar *aprefix;
8199
const xmlChar *nsname;
8201
const xmlChar **atts = ctxt->atts;
8202
int maxatts = ctxt->maxatts;
8203
int nratts, nbatts, nbdef;
8204
int i, j, nbNs, attval, oldline, oldcol;
8205
const xmlChar *base;
8207
int nsNr = ctxt->nsNr;
8209
if (RAW != '<') return(NULL);
8213
* NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8214
* point since the attribute values may be stored as pointers to
8215
* the buffer and calling SHRINK would destroy them !
8216
* The Shrinking is only possible once the full set of attribute
8217
* callbacks have been done.
8221
base = ctxt->input->base;
8222
cur = ctxt->input->cur - ctxt->input->base;
8223
oldline = ctxt->input->line;
8224
oldcol = ctxt->input->col;
8230
/* Forget any namespaces added during an earlier parse of this element. */
8233
localname = xmlParseQName(ctxt, &prefix);
8234
if (localname == NULL) {
8235
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8236
"StartTag: invalid element name\n");
8239
*tlen = ctxt->input->cur - ctxt->input->base - cur;
8242
* Now parse the attributes, it ends up with the ending
8248
if (ctxt->input->base != base) goto base_changed;
8250
while ((RAW != '>') &&
8251
((RAW != '/') || (NXT(1) != '>')) &&
8252
(IS_BYTE_CHAR(RAW))) {
8253
const xmlChar *q = CUR_PTR;
8254
unsigned int cons = ctxt->input->consumed;
8255
int len = -1, alloc = 0;
8257
attname = xmlParseAttribute2(ctxt, prefix, localname,
8258
&aprefix, &attvalue, &len, &alloc);
8259
if (ctxt->input->base != base) {
8260
if ((attvalue != NULL) && (alloc != 0))
8265
if ((attname != NULL) && (attvalue != NULL)) {
8266
if (len < 0) len = xmlStrlen(attvalue);
8267
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8268
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8272
uri = xmlParseURI((const char *) URL);
8274
xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8275
"xmlns: %s not a valid URI\n",
8278
if (uri->scheme == NULL) {
8279
xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8280
"xmlns: URI %s is not absolute\n",
8287
* check that it's not a defined namespace
8289
for (j = 1;j <= nbNs;j++)
8290
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8293
xmlErrAttributeDup(ctxt, NULL, attname);
8295
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8296
if (alloc != 0) xmlFree(attvalue);
8300
if (aprefix == ctxt->str_xmlns) {
8301
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8304
if (attname == ctxt->str_xml) {
8305
if (URL != ctxt->str_xml_ns) {
8306
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8307
"xml namespace prefix mapped to wrong URI\n",
8311
* Do not keep a namespace definition node
8313
if (alloc != 0) xmlFree(attvalue);
8317
uri = xmlParseURI((const char *) URL);
8319
xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8320
"xmlns:%s: '%s' is not a valid URI\n",
8323
if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8324
xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8325
"xmlns:%s: URI %s is not absolute\n",
8332
* check that it's not a defined namespace
8334
for (j = 1;j <= nbNs;j++)
8335
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8338
xmlErrAttributeDup(ctxt, aprefix, attname);
8340
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8341
if (alloc != 0) xmlFree(attvalue);
8343
if (ctxt->input->base != base) goto base_changed;
8348
* Add the pair to atts
8350
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8351
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8352
if (attvalue[len] == 0)
8356
maxatts = ctxt->maxatts;
8359
ctxt->attallocs[nratts++] = alloc;
8360
atts[nbatts++] = attname;
8361
atts[nbatts++] = aprefix;
8362
atts[nbatts++] = NULL; /* the URI will be fetched later */
8363
atts[nbatts++] = attvalue;
8365
atts[nbatts++] = attvalue;
8367
* tag if some deallocation is needed
8369
if (alloc != 0) attval = 1;
8371
if ((attvalue != NULL) && (attvalue[len] == 0))
8378
if (ctxt->input->base != base) goto base_changed;
8379
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8381
if (!IS_BLANK_CH(RAW)) {
8382
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8383
"attributes construct error\n");
8387
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8388
(attname == NULL) && (attvalue == NULL)) {
8389
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8390
"xmlParseStartTag: problem parsing attributes\n");
8394
if (ctxt->input->base != base) goto base_changed;
8398
* The attributes defaulting
8400
if (ctxt->attsDefault != NULL) {
8401
xmlDefAttrsPtr defaults;
8403
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8404
if (defaults != NULL) {
8405
for (i = 0;i < defaults->nbAttrs;i++) {
8406
attname = defaults->values[4 * i];
8407
aprefix = defaults->values[4 * i + 1];
8410
* special work for namespaces defaulted defs
8412
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8414
* check that it's not a defined namespace
8416
for (j = 1;j <= nbNs;j++)
8417
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8419
if (j <= nbNs) continue;
8421
nsname = xmlGetNamespace(ctxt, NULL);
8422
if (nsname != defaults->values[4 * i + 2]) {
8423
if (nsPush(ctxt, NULL,
8424
defaults->values[4 * i + 2]) > 0)
8427
} else if (aprefix == ctxt->str_xmlns) {
8429
* check that it's not a defined namespace
8431
for (j = 1;j <= nbNs;j++)
8432
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8434
if (j <= nbNs) continue;
8436
nsname = xmlGetNamespace(ctxt, attname);
8437
if (nsname != defaults->values[2]) {
8438
if (nsPush(ctxt, attname,
8439
defaults->values[4 * i + 2]) > 0)
8444
* check that it's not a defined attribute
8446
for (j = 0;j < nbatts;j+=5) {
8447
if ((attname == atts[j]) && (aprefix == atts[j+1]))
8450
if (j < nbatts) continue;
8452
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8453
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8456
maxatts = ctxt->maxatts;
8459
atts[nbatts++] = attname;
8460
atts[nbatts++] = aprefix;
8461
if (aprefix == NULL)
8462
atts[nbatts++] = NULL;
8464
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8465
atts[nbatts++] = defaults->values[4 * i + 2];
8466
atts[nbatts++] = defaults->values[4 * i + 3];
8474
* The attributes checkings
8476
for (i = 0; i < nbatts;i += 5) {
8478
* The default namespace does not apply to attribute names.
8480
if (atts[i + 1] != NULL) {
8481
nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8482
if (nsname == NULL) {
8483
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8484
"Namespace prefix %s for %s on %s is not defined\n",
8485
atts[i + 1], atts[i], localname);
8487
atts[i + 2] = nsname;
8491
* [ WFC: Unique Att Spec ]
8492
* No attribute name may appear more than once in the same
8493
* start-tag or empty-element tag.
8494
* As extended by the Namespace in XML REC.
8496
for (j = 0; j < i;j += 5) {
8497
if (atts[i] == atts[j]) {
8498
if (atts[i+1] == atts[j+1]) {
8499
xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8502
if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8503
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8504
"Namespaced Attribute %s in '%s' redefined\n",
8505
atts[i], nsname, NULL);
8512
nsname = xmlGetNamespace(ctxt, prefix);
8513
if ((prefix != NULL) && (nsname == NULL)) {
8514
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8515
"Namespace prefix %s on %s is not defined\n",
8516
prefix, localname, NULL);
8522
* SAX: Start of Element !
8524
if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8525
(!ctxt->disableSAX)) {
8527
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8528
nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8529
nbatts / 5, nbdef, atts);
8531
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8532
nsname, 0, NULL, nbatts / 5, nbdef, atts);
8536
* Free up attribute allocated strings if needed
8539
for (i = 3,j = 0; j < nratts;i += 5,j++)
8540
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8541
xmlFree((xmlChar *) atts[i]);
8548
* the attribute strings are valid iif the base didn't changed
8551
for (i = 3,j = 0; j < nratts;i += 5,j++)
8552
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8553
xmlFree((xmlChar *) atts[i]);
8555
ctxt->input->cur = ctxt->input->base + cur;
8556
ctxt->input->line = oldline;
8557
ctxt->input->col = oldcol;
8558
if (ctxt->wellFormed == 1) {
8566
* @ctxt: an XML parser context
8567
* @line: line of the start tag
8568
* @nsNr: number of namespaces on the start tag
8570
* parse an end of tag
8572
* [42] ETag ::= '</' Name S? '>'
8576
* [NS 9] ETag ::= '</' QName S? '>'
8580
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8581
const xmlChar *URI, int line, int nsNr, int tlen) {
8582
const xmlChar *name;
8585
if ((RAW != '<') || (NXT(1) != '/')) {
8586
xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8591
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8592
if (ctxt->input->cur[tlen] == '>') {
8593
ctxt->input->cur += tlen + 1;
8596
ctxt->input->cur += tlen;
8600
name = xmlParseNameAndCompare(ctxt, ctxt->name);
8602
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8606
* We should definitely be at the ending "S? '>'" part
8610
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8611
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8616
* [ WFC: Element Type Match ]
8617
* The Name in an element's end-tag must match the element type in the
8621
if (name != (xmlChar*)1) {
8622
if (name == NULL) name = BAD_CAST "unparseable";
8623
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8624
"Opening and ending tag mismatch: %s line %d and %s\n",
8625
ctxt->name, line, name);
8632
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8633
(!ctxt->disableSAX))
8634
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8644
* @ctxt: an XML parser context
8646
* Parse escaped pure raw content.
8648
* [18] CDSect ::= CDStart CData CDEnd
8650
* [19] CDStart ::= '<![CDATA['
8652
* [20] Data ::= (Char* - (Char* ']]>' Char*))
8654
* [21] CDEnd ::= ']]>'
8657
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8658
xmlChar *buf = NULL;
8660
int size = XML_PARSER_BUFFER_SIZE;
8666
/* Check 2.6.0 was NXT(0) not RAW */
8667
if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8672
ctxt->instate = XML_PARSER_CDATA_SECTION;
8675
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8676
ctxt->instate = XML_PARSER_CONTENT;
8682
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8683
ctxt->instate = XML_PARSER_CONTENT;
8688
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8690
xmlErrMemory(ctxt, NULL);
8693
while (IS_CHAR(cur) &&
8694
((r != ']') || (s != ']') || (cur != '>'))) {
8695
if (len + 5 >= size) {
8699
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8702
xmlErrMemory(ctxt, NULL);
8707
COPY_BUF(rl,buf,len,r);
8721
ctxt->instate = XML_PARSER_CONTENT;
8723
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8724
"CData section not finished\n%.50s\n", buf);
8731
* OK the buffer is to be consumed as cdata.
8733
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8734
if (ctxt->sax->cdataBlock != NULL)
8735
ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8736
else if (ctxt->sax->characters != NULL)
8737
ctxt->sax->characters(ctxt->userData, buf, len);
8744
* @ctxt: an XML parser context
8748
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8752
xmlParseContent(xmlParserCtxtPtr ctxt) {
8754
while ((RAW != 0) &&
8755
((RAW != '<') || (NXT(1) != '/')) &&
8756
(ctxt->instate != XML_PARSER_EOF)) {
8757
const xmlChar *test = CUR_PTR;
8758
unsigned int cons = ctxt->input->consumed;
8759
const xmlChar *cur = ctxt->input->cur;
8762
* First case : a Processing Instruction.
8764
if ((*cur == '<') && (cur[1] == '?')) {
8769
* Second case : a CDSection
8771
/* 2.6.0 test was *cur not RAW */
8772
else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8773
xmlParseCDSect(ctxt);
8777
* Third case : a comment
8779
else if ((*cur == '<') && (NXT(1) == '!') &&
8780
(NXT(2) == '-') && (NXT(3) == '-')) {
8781
xmlParseComment(ctxt);
8782
ctxt->instate = XML_PARSER_CONTENT;
8786
* Fourth case : a sub-element.
8788
else if (*cur == '<') {
8789
xmlParseElement(ctxt);
8793
* Fifth case : a reference. If if has not been resolved,
8794
* parsing returns it's Name, create the node
8797
else if (*cur == '&') {
8798
xmlParseReference(ctxt);
8802
* Last case, text. Note that References are handled directly.
8805
xmlParseCharData(ctxt, 0);
8810
* Pop-up of finished entities.
8812
while ((RAW == 0) && (ctxt->inputNr > 1))
8816
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8817
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8818
"detected an error in element content\n");
8819
ctxt->instate = XML_PARSER_EOF;
8827
* @ctxt: an XML parser context
8829
* parse an XML element, this is highly recursive
8831
* [39] element ::= EmptyElemTag | STag content ETag
8833
* [ WFC: Element Type Match ]
8834
* The Name in an element's end-tag must match the element type in the
8840
xmlParseElement(xmlParserCtxtPtr ctxt) {
8841
const xmlChar *name;
8842
const xmlChar *prefix;
8844
xmlParserNodeInfo node_info;
8847
int nsNr = ctxt->nsNr;
8849
if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8850
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8851
"Excessive depth in document: change xmlParserMaxDepth = %d\n",
8853
ctxt->instate = XML_PARSER_EOF;
8857
/* Capture start position */
8858
if (ctxt->record_info) {
8859
node_info.begin_pos = ctxt->input->consumed +
8860
(CUR_PTR - ctxt->input->base);
8861
node_info.begin_line = ctxt->input->line;
8864
if (ctxt->spaceNr == 0)
8865
spacePush(ctxt, -1);
8866
else if (*ctxt->space == -2)
8867
spacePush(ctxt, -1);
8869
spacePush(ctxt, *ctxt->space);
8871
line = ctxt->input->line;
8872
#ifdef LIBXML_SAX1_ENABLED
8874
#endif /* LIBXML_SAX1_ENABLED */
8875
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8876
#ifdef LIBXML_SAX1_ENABLED
8878
name = xmlParseStartTag(ctxt);
8879
#endif /* LIBXML_SAX1_ENABLED */
8884
namePush(ctxt, name);
8887
#ifdef LIBXML_VALID_ENABLED
8889
* [ VC: Root Element Type ]
8890
* The Name in the document type declaration must match the element
8891
* type of the root element.
8893
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8894
ctxt->node && (ctxt->node == ctxt->myDoc->children))
8895
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8896
#endif /* LIBXML_VALID_ENABLED */
8899
* Check for an Empty Element.
8901
if ((RAW == '/') && (NXT(1) == '>')) {
8904
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8905
(!ctxt->disableSAX))
8906
ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8907
#ifdef LIBXML_SAX1_ENABLED
8909
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8910
(!ctxt->disableSAX))
8911
ctxt->sax->endElement(ctxt->userData, name);
8912
#endif /* LIBXML_SAX1_ENABLED */
8916
if (nsNr != ctxt->nsNr)
8917
nsPop(ctxt, ctxt->nsNr - nsNr);
8918
if ( ret != NULL && ctxt->record_info ) {
8919
node_info.end_pos = ctxt->input->consumed +
8920
(CUR_PTR - ctxt->input->base);
8921
node_info.end_line = ctxt->input->line;
8922
node_info.node = ret;
8923
xmlParserAddNodeInfo(ctxt, &node_info);
8930
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8931
"Couldn't find end of Start Tag %s line %d\n",
8935
* end of parsing of this node.
8940
if (nsNr != ctxt->nsNr)
8941
nsPop(ctxt, ctxt->nsNr - nsNr);
8944
* Capture end position and add node
8946
if ( ret != NULL && ctxt->record_info ) {
8947
node_info.end_pos = ctxt->input->consumed +
8948
(CUR_PTR - ctxt->input->base);
8949
node_info.end_line = ctxt->input->line;
8950
node_info.node = ret;
8951
xmlParserAddNodeInfo(ctxt, &node_info);
8957
* Parse the content of the element:
8959
xmlParseContent(ctxt);
8960
if (!IS_BYTE_CHAR(RAW)) {
8961
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8962
"Premature end of data in tag %s line %d\n",
8966
* end of parsing of this node.
8971
if (nsNr != ctxt->nsNr)
8972
nsPop(ctxt, ctxt->nsNr - nsNr);
8977
* parse the end of tag: '</' should be here.
8980
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8983
#ifdef LIBXML_SAX1_ENABLED
8985
xmlParseEndTag1(ctxt, line);
8986
#endif /* LIBXML_SAX1_ENABLED */
8989
* Capture end position and add node
8991
if ( ret != NULL && ctxt->record_info ) {
8992
node_info.end_pos = ctxt->input->consumed +
8993
(CUR_PTR - ctxt->input->base);
8994
node_info.end_line = ctxt->input->line;
8995
node_info.node = ret;
8996
xmlParserAddNodeInfo(ctxt, &node_info);
9001
* xmlParseVersionNum:
9002
* @ctxt: an XML parser context
9004
* parse the XML version value.
9006
* [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
9008
* Returns the string giving the XML version number, or NULL
9011
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9012
xmlChar *buf = NULL;
9017
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9019
xmlErrMemory(ctxt, NULL);
9023
while (((cur >= 'a') && (cur <= 'z')) ||
9024
((cur >= 'A') && (cur <= 'Z')) ||
9025
((cur >= '0') && (cur <= '9')) ||
9026
(cur == '_') || (cur == '.') ||
9027
(cur == ':') || (cur == '-')) {
9028
if (len + 1 >= size) {
9032
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9035
xmlErrMemory(ctxt, NULL);
9049
* xmlParseVersionInfo:
9050
* @ctxt: an XML parser context
9052
* parse the XML version.
9054
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9056
* [25] Eq ::= S? '=' S?
9058
* Returns the version string, e.g. "1.0"
9062
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9063
xmlChar *version = NULL;
9065
if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9069
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9076
version = xmlParseVersionNum(ctxt);
9078
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9081
} else if (RAW == '\''){
9083
version = xmlParseVersionNum(ctxt);
9085
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9089
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9097
* @ctxt: an XML parser context
9099
* parse the XML encoding name
9101
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9103
* Returns the encoding name value or NULL
9106
xmlParseEncName(xmlParserCtxtPtr ctxt) {
9107
xmlChar *buf = NULL;
9113
if (((cur >= 'a') && (cur <= 'z')) ||
9114
((cur >= 'A') && (cur <= 'Z'))) {
9115
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9117
xmlErrMemory(ctxt, NULL);
9124
while (((cur >= 'a') && (cur <= 'z')) ||
9125
((cur >= 'A') && (cur <= 'Z')) ||
9126
((cur >= '0') && (cur <= '9')) ||
9127
(cur == '.') || (cur == '_') ||
9129
if (len + 1 >= size) {
9133
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9135
xmlErrMemory(ctxt, NULL);
9152
xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9158
* xmlParseEncodingDecl:
9159
* @ctxt: an XML parser context
9161
* parse the XML encoding declaration
9163
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9165
* this setups the conversion filters.
9167
* Returns the encoding value or NULL
9171
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9172
xmlChar *encoding = NULL;
9175
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9179
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9186
encoding = xmlParseEncName(ctxt);
9188
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9191
} else if (RAW == '\''){
9193
encoding = xmlParseEncName(ctxt);
9195
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9199
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9202
* UTF-16 encoding stwich has already taken place at this stage,
9203
* more over the little-endian/big-endian selection is already done
9205
if ((encoding != NULL) &&
9206
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9207
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9208
if (ctxt->encoding != NULL)
9209
xmlFree((xmlChar *) ctxt->encoding);
9210
ctxt->encoding = encoding;
9213
* UTF-8 encoding is handled natively
9215
else if ((encoding != NULL) &&
9216
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9217
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9218
if (ctxt->encoding != NULL)
9219
xmlFree((xmlChar *) ctxt->encoding);
9220
ctxt->encoding = encoding;
9222
else if (encoding != NULL) {
9223
xmlCharEncodingHandlerPtr handler;
9225
if (ctxt->input->encoding != NULL)
9226
xmlFree((xmlChar *) ctxt->input->encoding);
9227
ctxt->input->encoding = encoding;
9229
handler = xmlFindCharEncodingHandler((const char *) encoding);
9230
if (handler != NULL) {
9231
xmlSwitchToEncoding(ctxt, handler);
9233
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9234
"Unsupported encoding %s\n", encoding);
9244
* @ctxt: an XML parser context
9246
* parse the XML standalone declaration
9248
* [32] SDDecl ::= S 'standalone' Eq
9249
* (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9251
* [ VC: Standalone Document Declaration ]
9252
* TODO The standalone document declaration must have the value "no"
9253
* if any external markup declarations contain declarations of:
9254
* - attributes with default values, if elements to which these
9255
* attributes apply appear in the document without specifications
9256
* of values for these attributes, or
9257
* - entities (other than amp, lt, gt, apos, quot), if references
9258
* to those entities appear in the document, or
9259
* - attributes with values subject to normalization, where the
9260
* attribute appears in the document with a value which will change
9261
* as a result of normalization, or
9262
* - element types with element content, if white space occurs directly
9263
* within any instance of those types.
9266
* 1 if standalone="yes"
9267
* 0 if standalone="no"
9268
* -2 if standalone attribute is missing or invalid
9269
* (A standalone value of -2 means that the XML declaration was found,
9270
* but no value was specified for the standalone attribute).
9274
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9275
int standalone = -2;
9278
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9282
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9289
if ((RAW == 'n') && (NXT(1) == 'o')) {
9292
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
9297
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9300
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9303
} else if (RAW == '"'){
9305
if ((RAW == 'n') && (NXT(1) == 'o')) {
9308
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
9313
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9316
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9320
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9328
* @ctxt: an XML parser context
9330
* parse an XML declaration header
9332
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9336
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9340
* This value for standalone indicates that the document has an
9341
* XML declaration but it does not have a standalone attribute.
9342
* It will be overwritten later if a standalone attribute is found.
9344
ctxt->input->standalone = -2;
9347
* We know that '<?xml' is here.
9351
if (!IS_BLANK_CH(RAW)) {
9352
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9353
"Blank needed after '<?xml'\n");
9358
* We must have the VersionInfo here.
9360
version = xmlParseVersionInfo(ctxt);
9361
if (version == NULL) {
9362
xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9364
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9366
* TODO: Blueberry should be detected here
9368
xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9369
"Unsupported version '%s'\n",
9372
if (ctxt->version != NULL)
9373
xmlFree((void *) ctxt->version);
9374
ctxt->version = version;
9378
* We may have the encoding declaration
9380
if (!IS_BLANK_CH(RAW)) {
9381
if ((RAW == '?') && (NXT(1) == '>')) {
9385
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9387
xmlParseEncodingDecl(ctxt);
9388
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9390
* The XML REC instructs us to stop parsing right here
9396
* We may have the standalone status.
9398
if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9399
if ((RAW == '?') && (NXT(1) == '>')) {
9403
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9406
ctxt->input->standalone = xmlParseSDDecl(ctxt);
9409
if ((RAW == '?') && (NXT(1) == '>')) {
9411
} else if (RAW == '>') {
9412
/* Deprecated old WD ... */
9413
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9416
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9417
MOVETO_ENDTAG(CUR_PTR);
9424
* @ctxt: an XML parser context
9426
* parse an XML Misc* optional field.
9428
* [27] Misc ::= Comment | PI | S
9432
xmlParseMisc(xmlParserCtxtPtr ctxt) {
9433
while (((RAW == '<') && (NXT(1) == '?')) ||
9434
(CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9436
if ((RAW == '<') && (NXT(1) == '?')) {
9438
} else if (IS_BLANK_CH(CUR)) {
9441
xmlParseComment(ctxt);
9447
* @ctxt: an XML parser context
9449
* parse an XML document (and build a tree if using the standard SAX
9452
* [1] document ::= prolog element Misc*
9454
* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9456
* Returns 0, -1 in case of error. the parser context is augmented
9457
* as a result of the parsing.
9461
xmlParseDocument(xmlParserCtxtPtr ctxt) {
9463
xmlCharEncoding enc;
9467
if ((ctxt == NULL) || (ctxt->input == NULL))
9473
* SAX: detecting the level.
9475
xmlDetectSAX2(ctxt);
9478
* SAX: beginning of the document processing.
9480
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9481
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9483
if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9484
((ctxt->input->end - ctxt->input->cur) >= 4)) {
9486
* Get the 4 first bytes and decode the charset
9487
* if enc != XML_CHAR_ENCODING_NONE
9488
* plug some encoding conversion routines.
9494
enc = xmlDetectCharEncoding(&start[0], 4);
9495
if (enc != XML_CHAR_ENCODING_NONE) {
9496
xmlSwitchEncoding(ctxt, enc);
9502
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9506
* Check for the XMLDecl in the Prolog.
9509
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9512
* Note that we will switch encoding on the fly.
9514
xmlParseXMLDecl(ctxt);
9515
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9517
* The XML REC instructs us to stop parsing right here
9521
ctxt->standalone = ctxt->input->standalone;
9524
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9526
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9527
ctxt->sax->startDocument(ctxt->userData);
9530
* The Misc part of the Prolog
9536
* Then possibly doc type declaration(s) and more Misc
9537
* (doctypedecl Misc*)?
9540
if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9543
xmlParseDocTypeDecl(ctxt);
9545
ctxt->instate = XML_PARSER_DTD;
9546
xmlParseInternalSubset(ctxt);
9550
* Create and update the external subset.
9553
if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9554
(!ctxt->disableSAX))
9555
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9556
ctxt->extSubSystem, ctxt->extSubURI);
9559
xmlCleanSpecialAttr(ctxt);
9561
ctxt->instate = XML_PARSER_PROLOG;
9566
* Time to start parsing the tree itself
9570
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9571
"Start tag expected, '<' not found\n");
9573
ctxt->instate = XML_PARSER_CONTENT;
9574
xmlParseElement(ctxt);
9575
ctxt->instate = XML_PARSER_EPILOG;
9579
* The Misc part at the end
9584
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9586
ctxt->instate = XML_PARSER_EOF;
9590
* SAX: end of the document processing.
9592
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9593
ctxt->sax->endDocument(ctxt->userData);
9596
* Remove locally kept entity definitions if the tree was not built
9598
if ((ctxt->myDoc != NULL) &&
9599
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9600
xmlFreeDoc(ctxt->myDoc);
9604
if (! ctxt->wellFormed) {
9612
* xmlParseExtParsedEnt:
9613
* @ctxt: an XML parser context
9615
* parse a general parsed entity
9616
* An external general parsed entity is well-formed if it matches the
9617
* production labeled extParsedEnt.
9619
* [78] extParsedEnt ::= TextDecl? content
9621
* Returns 0, -1 in case of error. the parser context is augmented
9622
* as a result of the parsing.
9626
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9628
xmlCharEncoding enc;
9630
if ((ctxt == NULL) || (ctxt->input == NULL))
9633
xmlDefaultSAXHandlerInit();
9635
xmlDetectSAX2(ctxt);
9640
* SAX: beginning of the document processing.
9642
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9643
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9646
* Get the 4 first bytes and decode the charset
9647
* if enc != XML_CHAR_ENCODING_NONE
9648
* plug some encoding conversion routines.
9650
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9655
enc = xmlDetectCharEncoding(start, 4);
9656
if (enc != XML_CHAR_ENCODING_NONE) {
9657
xmlSwitchEncoding(ctxt, enc);
9663
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9667
* Check for the XMLDecl in the Prolog.
9670
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9673
* Note that we will switch encoding on the fly.
9675
xmlParseXMLDecl(ctxt);
9676
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9678
* The XML REC instructs us to stop parsing right here
9684
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9686
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9687
ctxt->sax->startDocument(ctxt->userData);
9690
* Doing validity checking on chunk doesn't make sense
9692
ctxt->instate = XML_PARSER_CONTENT;
9694
ctxt->loadsubset = 0;
9697
xmlParseContent(ctxt);
9699
if ((RAW == '<') && (NXT(1) == '/')) {
9700
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9701
} else if (RAW != 0) {
9702
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9706
* SAX: end of the document processing.
9708
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9709
ctxt->sax->endDocument(ctxt->userData);
9711
if (! ctxt->wellFormed) return(-1);
9715
#ifdef LIBXML_PUSH_ENABLED
9716
/************************************************************************
9718
* Progressive parsing interfaces *
9720
************************************************************************/
9723
* xmlParseLookupSequence:
9724
* @ctxt: an XML parser context
9725
* @first: the first char to lookup
9726
* @next: the next char to lookup or zero
9727
* @third: the next char to lookup or zero
9729
* Try to find if a sequence (first, next, third) or just (first next) or
9730
* (first) is available in the input stream.
9731
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
9732
* to avoid rescanning sequences of bytes, it DOES change the state of the
9733
* parser, do not use liberally.
9735
* Returns the index to the current parsing point if the full sequence
9736
* is available, -1 otherwise.
9739
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9740
xmlChar next, xmlChar third) {
9742
xmlParserInputPtr in;
9746
if (in == NULL) return(-1);
9747
base = in->cur - in->base;
9748
if (base < 0) return(-1);
9749
if (ctxt->checkIndex > base)
9750
base = ctxt->checkIndex;
9751
if (in->buf == NULL) {
9755
buf = in->buf->buffer->content;
9756
len = in->buf->buffer->use;
9758
/* take into account the sequence length */
9759
if (third) len -= 2;
9760
else if (next) len --;
9761
for (;base < len;base++) {
9762
if (buf[base] == first) {
9764
if ((buf[base + 1] != next) ||
9765
(buf[base + 2] != third)) continue;
9766
} else if (next != 0) {
9767
if (buf[base + 1] != next) continue;
9769
ctxt->checkIndex = 0;
9772
xmlGenericError(xmlGenericErrorContext,
9773
"PP: lookup '%c' found at %d\n",
9775
else if (third == 0)
9776
xmlGenericError(xmlGenericErrorContext,
9777
"PP: lookup '%c%c' found at %d\n",
9780
xmlGenericError(xmlGenericErrorContext,
9781
"PP: lookup '%c%c%c' found at %d\n",
9782
first, next, third, base);
9784
return(base - (in->cur - in->base));
9787
ctxt->checkIndex = base;
9790
xmlGenericError(xmlGenericErrorContext,
9791
"PP: lookup '%c' failed\n", first);
9792
else if (third == 0)
9793
xmlGenericError(xmlGenericErrorContext,
9794
"PP: lookup '%c%c' failed\n", first, next);
9796
xmlGenericError(xmlGenericErrorContext,
9797
"PP: lookup '%c%c%c' failed\n", first, next, third);
9804
* @ctxt: an XML parser context
9805
* @lastlt: pointer to store the last '<' from the input
9806
* @lastgt: pointer to store the last '>' from the input
9808
* Lookup the last < and > in the current chunk
9811
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9812
const xmlChar **lastgt) {
9815
if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9816
xmlGenericError(xmlGenericErrorContext,
9817
"Internal error: xmlParseGetLasts\n");
9820
if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9821
tmp = ctxt->input->end;
9823
while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9824
if (tmp < ctxt->input->base) {
9830
while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9833
while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9834
if (tmp < ctxt->input->end) tmp++;
9835
} else if (*tmp == '"') {
9837
while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9838
if (tmp < ctxt->input->end) tmp++;
9842
if (tmp < ctxt->input->end)
9847
while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9848
if (tmp >= ctxt->input->base)
9860
* xmlCheckCdataPush:
9861
* @cur: pointer to the bock of characters
9862
* @len: length of the block in bytes
9864
* Check that the block of characters is okay as SCdata content [20]
9866
* Returns the number of bytes to pass if okay, a negative index where an
9867
* UTF-8 error occured otherwise
9870
xmlCheckCdataPush(const xmlChar *utf, int len) {
9875
if ((utf == NULL) || (len <= 0))
9878
for (ix = 0; ix < len;) { /* string is 0-terminated */
9880
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9883
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9887
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9888
if (ix + 2 > len) return(ix);
9889
if ((utf[ix+1] & 0xc0 ) != 0x80)
9891
codepoint = (utf[ix] & 0x1f) << 6;
9892
codepoint |= utf[ix+1] & 0x3f;
9893
if (!xmlIsCharQ(codepoint))
9896
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9897
if (ix + 3 > len) return(ix);
9898
if (((utf[ix+1] & 0xc0) != 0x80) ||
9899
((utf[ix+2] & 0xc0) != 0x80))
9901
codepoint = (utf[ix] & 0xf) << 12;
9902
codepoint |= (utf[ix+1] & 0x3f) << 6;
9903
codepoint |= utf[ix+2] & 0x3f;
9904
if (!xmlIsCharQ(codepoint))
9907
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9908
if (ix + 4 > len) return(ix);
9909
if (((utf[ix+1] & 0xc0) != 0x80) ||
9910
((utf[ix+2] & 0xc0) != 0x80) ||
9911
((utf[ix+3] & 0xc0) != 0x80))
9913
codepoint = (utf[ix] & 0x7) << 18;
9914
codepoint |= (utf[ix+1] & 0x3f) << 12;
9915
codepoint |= (utf[ix+2] & 0x3f) << 6;
9916
codepoint |= utf[ix+3] & 0x3f;
9917
if (!xmlIsCharQ(codepoint))
9920
} else /* unknown encoding */
9927
* xmlParseTryOrFinish:
9928
* @ctxt: an XML parser context
9929
* @terminate: last chunk indicator
9931
* Try to progress on parsing
9933
* Returns zero if no parsing was possible
9936
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9940
const xmlChar *lastlt, *lastgt;
9942
if (ctxt->input == NULL)
9946
switch (ctxt->instate) {
9947
case XML_PARSER_EOF:
9948
xmlGenericError(xmlGenericErrorContext,
9949
"PP: try EOF\n"); break;
9950
case XML_PARSER_START:
9951
xmlGenericError(xmlGenericErrorContext,
9952
"PP: try START\n"); break;
9953
case XML_PARSER_MISC:
9954
xmlGenericError(xmlGenericErrorContext,
9955
"PP: try MISC\n");break;
9956
case XML_PARSER_COMMENT:
9957
xmlGenericError(xmlGenericErrorContext,
9958
"PP: try COMMENT\n");break;
9959
case XML_PARSER_PROLOG:
9960
xmlGenericError(xmlGenericErrorContext,
9961
"PP: try PROLOG\n");break;
9962
case XML_PARSER_START_TAG:
9963
xmlGenericError(xmlGenericErrorContext,
9964
"PP: try START_TAG\n");break;
9965
case XML_PARSER_CONTENT:
9966
xmlGenericError(xmlGenericErrorContext,
9967
"PP: try CONTENT\n");break;
9968
case XML_PARSER_CDATA_SECTION:
9969
xmlGenericError(xmlGenericErrorContext,
9970
"PP: try CDATA_SECTION\n");break;
9971
case XML_PARSER_END_TAG:
9972
xmlGenericError(xmlGenericErrorContext,
9973
"PP: try END_TAG\n");break;
9974
case XML_PARSER_ENTITY_DECL:
9975
xmlGenericError(xmlGenericErrorContext,
9976
"PP: try ENTITY_DECL\n");break;
9977
case XML_PARSER_ENTITY_VALUE:
9978
xmlGenericError(xmlGenericErrorContext,
9979
"PP: try ENTITY_VALUE\n");break;
9980
case XML_PARSER_ATTRIBUTE_VALUE:
9981
xmlGenericError(xmlGenericErrorContext,
9982
"PP: try ATTRIBUTE_VALUE\n");break;
9983
case XML_PARSER_DTD:
9984
xmlGenericError(xmlGenericErrorContext,
9985
"PP: try DTD\n");break;
9986
case XML_PARSER_EPILOG:
9987
xmlGenericError(xmlGenericErrorContext,
9988
"PP: try EPILOG\n");break;
9990
xmlGenericError(xmlGenericErrorContext,
9991
"PP: try PI\n");break;
9992
case XML_PARSER_IGNORE:
9993
xmlGenericError(xmlGenericErrorContext,
9994
"PP: try IGNORE\n");break;
9998
if ((ctxt->input != NULL) &&
9999
(ctxt->input->cur - ctxt->input->base > 4096)) {
10001
ctxt->checkIndex = 0;
10003
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10006
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10011
* Pop-up of finished entities.
10013
while ((RAW == 0) && (ctxt->inputNr > 1))
10016
if (ctxt->input == NULL) break;
10017
if (ctxt->input->buf == NULL)
10018
avail = ctxt->input->length -
10019
(ctxt->input->cur - ctxt->input->base);
10022
* If we are operating on converted input, try to flush
10023
* remainng chars to avoid them stalling in the non-converted
10026
if ((ctxt->input->buf->raw != NULL) &&
10027
(ctxt->input->buf->raw->use > 0)) {
10028
int base = ctxt->input->base -
10029
ctxt->input->buf->buffer->content;
10030
int current = ctxt->input->cur - ctxt->input->base;
10032
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10033
ctxt->input->base = ctxt->input->buf->buffer->content + base;
10034
ctxt->input->cur = ctxt->input->base + current;
10036
&ctxt->input->buf->buffer->content[
10037
ctxt->input->buf->buffer->use];
10039
avail = ctxt->input->buf->buffer->use -
10040
(ctxt->input->cur - ctxt->input->base);
10044
switch (ctxt->instate) {
10045
case XML_PARSER_EOF:
10047
* Document parsing is done !
10050
case XML_PARSER_START:
10051
if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10053
xmlCharEncoding enc;
10056
* Very first chars read from the document flow.
10062
* Get the 4 first bytes and decode the charset
10063
* if enc != XML_CHAR_ENCODING_NONE
10064
* plug some encoding conversion routines,
10065
* else xmlSwitchEncoding will set to (default)
10072
enc = xmlDetectCharEncoding(start, 4);
10073
xmlSwitchEncoding(ctxt, enc);
10079
cur = ctxt->input->cur[0];
10080
next = ctxt->input->cur[1];
10082
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10083
ctxt->sax->setDocumentLocator(ctxt->userData,
10084
&xmlDefaultSAXLocator);
10085
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10086
ctxt->instate = XML_PARSER_EOF;
10088
xmlGenericError(xmlGenericErrorContext,
10089
"PP: entering EOF\n");
10091
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10092
ctxt->sax->endDocument(ctxt->userData);
10095
if ((cur == '<') && (next == '?')) {
10096
/* PI or XML decl */
10097
if (avail < 5) return(ret);
10098
if ((!terminate) &&
10099
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10101
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10102
ctxt->sax->setDocumentLocator(ctxt->userData,
10103
&xmlDefaultSAXLocator);
10104
if ((ctxt->input->cur[2] == 'x') &&
10105
(ctxt->input->cur[3] == 'm') &&
10106
(ctxt->input->cur[4] == 'l') &&
10107
(IS_BLANK_CH(ctxt->input->cur[5]))) {
10110
xmlGenericError(xmlGenericErrorContext,
10111
"PP: Parsing XML Decl\n");
10113
xmlParseXMLDecl(ctxt);
10114
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10116
* The XML REC instructs us to stop parsing right
10119
ctxt->instate = XML_PARSER_EOF;
10122
ctxt->standalone = ctxt->input->standalone;
10123
if ((ctxt->encoding == NULL) &&
10124
(ctxt->input->encoding != NULL))
10125
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10126
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10127
(!ctxt->disableSAX))
10128
ctxt->sax->startDocument(ctxt->userData);
10129
ctxt->instate = XML_PARSER_MISC;
10131
xmlGenericError(xmlGenericErrorContext,
10132
"PP: entering MISC\n");
10135
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10136
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10137
(!ctxt->disableSAX))
10138
ctxt->sax->startDocument(ctxt->userData);
10139
ctxt->instate = XML_PARSER_MISC;
10141
xmlGenericError(xmlGenericErrorContext,
10142
"PP: entering MISC\n");
10146
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10147
ctxt->sax->setDocumentLocator(ctxt->userData,
10148
&xmlDefaultSAXLocator);
10149
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10150
if (ctxt->version == NULL) {
10151
xmlErrMemory(ctxt, NULL);
10154
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10155
(!ctxt->disableSAX))
10156
ctxt->sax->startDocument(ctxt->userData);
10157
ctxt->instate = XML_PARSER_MISC;
10159
xmlGenericError(xmlGenericErrorContext,
10160
"PP: entering MISC\n");
10164
case XML_PARSER_START_TAG: {
10165
const xmlChar *name;
10166
const xmlChar *prefix;
10167
const xmlChar *URI;
10168
int nsNr = ctxt->nsNr;
10170
if ((avail < 2) && (ctxt->inputNr == 1))
10172
cur = ctxt->input->cur[0];
10174
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10175
ctxt->instate = XML_PARSER_EOF;
10176
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10177
ctxt->sax->endDocument(ctxt->userData);
10181
if (ctxt->progressive) {
10182
/* > can be found unescaped in attribute values */
10183
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10185
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10189
if (ctxt->spaceNr == 0)
10190
spacePush(ctxt, -1);
10191
else if (*ctxt->space == -2)
10192
spacePush(ctxt, -1);
10194
spacePush(ctxt, *ctxt->space);
10195
#ifdef LIBXML_SAX1_ENABLED
10197
#endif /* LIBXML_SAX1_ENABLED */
10198
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10199
#ifdef LIBXML_SAX1_ENABLED
10201
name = xmlParseStartTag(ctxt);
10202
#endif /* LIBXML_SAX1_ENABLED */
10203
if (name == NULL) {
10205
ctxt->instate = XML_PARSER_EOF;
10206
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10207
ctxt->sax->endDocument(ctxt->userData);
10210
#ifdef LIBXML_VALID_ENABLED
10212
* [ VC: Root Element Type ]
10213
* The Name in the document type declaration must match
10214
* the element type of the root element.
10216
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10217
ctxt->node && (ctxt->node == ctxt->myDoc->children))
10218
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10219
#endif /* LIBXML_VALID_ENABLED */
10222
* Check for an Empty Element.
10224
if ((RAW == '/') && (NXT(1) == '>')) {
10228
if ((ctxt->sax != NULL) &&
10229
(ctxt->sax->endElementNs != NULL) &&
10230
(!ctxt->disableSAX))
10231
ctxt->sax->endElementNs(ctxt->userData, name,
10233
if (ctxt->nsNr - nsNr > 0)
10234
nsPop(ctxt, ctxt->nsNr - nsNr);
10235
#ifdef LIBXML_SAX1_ENABLED
10237
if ((ctxt->sax != NULL) &&
10238
(ctxt->sax->endElement != NULL) &&
10239
(!ctxt->disableSAX))
10240
ctxt->sax->endElement(ctxt->userData, name);
10241
#endif /* LIBXML_SAX1_ENABLED */
10244
if (ctxt->nameNr == 0) {
10245
ctxt->instate = XML_PARSER_EPILOG;
10247
ctxt->instate = XML_PARSER_CONTENT;
10254
xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10255
"Couldn't find end of Start Tag %s\n",
10261
nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
10262
#ifdef LIBXML_SAX1_ENABLED
10264
namePush(ctxt, name);
10265
#endif /* LIBXML_SAX1_ENABLED */
10267
ctxt->instate = XML_PARSER_CONTENT;
10270
case XML_PARSER_CONTENT: {
10271
const xmlChar *test;
10273
if ((avail < 2) && (ctxt->inputNr == 1))
10275
cur = ctxt->input->cur[0];
10276
next = ctxt->input->cur[1];
10279
cons = ctxt->input->consumed;
10280
if ((cur == '<') && (next == '/')) {
10281
ctxt->instate = XML_PARSER_END_TAG;
10283
} else if ((cur == '<') && (next == '?')) {
10284
if ((!terminate) &&
10285
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10288
} else if ((cur == '<') && (next != '!')) {
10289
ctxt->instate = XML_PARSER_START_TAG;
10291
} else if ((cur == '<') && (next == '!') &&
10292
(ctxt->input->cur[2] == '-') &&
10293
(ctxt->input->cur[3] == '-')) {
10298
ctxt->input->cur += 4;
10299
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10300
ctxt->input->cur -= 4;
10301
if ((!terminate) && (term < 0))
10303
xmlParseComment(ctxt);
10304
ctxt->instate = XML_PARSER_CONTENT;
10305
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10306
(ctxt->input->cur[2] == '[') &&
10307
(ctxt->input->cur[3] == 'C') &&
10308
(ctxt->input->cur[4] == 'D') &&
10309
(ctxt->input->cur[5] == 'A') &&
10310
(ctxt->input->cur[6] == 'T') &&
10311
(ctxt->input->cur[7] == 'A') &&
10312
(ctxt->input->cur[8] == '[')) {
10314
ctxt->instate = XML_PARSER_CDATA_SECTION;
10316
} else if ((cur == '<') && (next == '!') &&
10319
} else if (cur == '&') {
10320
if ((!terminate) &&
10321
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10323
xmlParseReference(ctxt);
10325
/* TODO Avoid the extra copy, handle directly !!! */
10327
* Goal of the following test is:
10328
* - minimize calls to the SAX 'character' callback
10329
* when they are mergeable
10330
* - handle an problem for isBlank when we only parse
10331
* a sequence of blank chars and the next one is
10332
* not available to check against '<' presence.
10333
* - tries to homogenize the differences in SAX
10334
* callbacks between the push and pull versions
10337
if ((ctxt->inputNr == 1) &&
10338
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10340
if (ctxt->progressive) {
10341
if ((lastlt == NULL) ||
10342
(ctxt->input->cur > lastlt))
10344
} else if (xmlParseLookupSequence(ctxt,
10350
ctxt->checkIndex = 0;
10351
xmlParseCharData(ctxt, 0);
10354
* Pop-up of finished entities.
10356
while ((RAW == 0) && (ctxt->inputNr > 1))
10358
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10359
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10360
"detected an error in element content\n");
10361
ctxt->instate = XML_PARSER_EOF;
10366
case XML_PARSER_END_TAG:
10370
if (ctxt->progressive) {
10371
/* > can be found unescaped in attribute values */
10372
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10374
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10379
xmlParseEndTag2(ctxt,
10380
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10381
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10382
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10385
#ifdef LIBXML_SAX1_ENABLED
10387
xmlParseEndTag1(ctxt, 0);
10388
#endif /* LIBXML_SAX1_ENABLED */
10389
if (ctxt->nameNr == 0) {
10390
ctxt->instate = XML_PARSER_EPILOG;
10392
ctxt->instate = XML_PARSER_CONTENT;
10395
case XML_PARSER_CDATA_SECTION: {
10397
* The Push mode need to have the SAX callback for
10398
* cdataBlock merge back contiguous callbacks.
10402
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10404
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10407
tmp = xmlCheckCdataPush(ctxt->input->cur,
10408
XML_PARSER_BIG_BUFFER_SIZE);
10411
ctxt->input->cur += tmp;
10412
goto encoding_error;
10414
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10415
if (ctxt->sax->cdataBlock != NULL)
10416
ctxt->sax->cdataBlock(ctxt->userData,
10417
ctxt->input->cur, tmp);
10418
else if (ctxt->sax->characters != NULL)
10419
ctxt->sax->characters(ctxt->userData,
10420
ctxt->input->cur, tmp);
10423
ctxt->checkIndex = 0;
10429
tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10430
if ((tmp < 0) || (tmp != base)) {
10432
ctxt->input->cur += tmp;
10433
goto encoding_error;
10435
if ((ctxt->sax != NULL) && (base == 0) &&
10436
(ctxt->sax->cdataBlock != NULL) &&
10437
(!ctxt->disableSAX)) {
10439
* Special case to provide identical behaviour
10440
* between pull and push parsers on enpty CDATA
10443
if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10444
(!strncmp((const char *)&ctxt->input->cur[-9],
10446
ctxt->sax->cdataBlock(ctxt->userData,
10448
} else if ((ctxt->sax != NULL) && (base > 0) &&
10449
(!ctxt->disableSAX)) {
10450
if (ctxt->sax->cdataBlock != NULL)
10451
ctxt->sax->cdataBlock(ctxt->userData,
10452
ctxt->input->cur, base);
10453
else if (ctxt->sax->characters != NULL)
10454
ctxt->sax->characters(ctxt->userData,
10455
ctxt->input->cur, base);
10458
ctxt->checkIndex = 0;
10459
ctxt->instate = XML_PARSER_CONTENT;
10461
xmlGenericError(xmlGenericErrorContext,
10462
"PP: entering CONTENT\n");
10467
case XML_PARSER_MISC:
10469
if (ctxt->input->buf == NULL)
10470
avail = ctxt->input->length -
10471
(ctxt->input->cur - ctxt->input->base);
10473
avail = ctxt->input->buf->buffer->use -
10474
(ctxt->input->cur - ctxt->input->base);
10477
cur = ctxt->input->cur[0];
10478
next = ctxt->input->cur[1];
10479
if ((cur == '<') && (next == '?')) {
10480
if ((!terminate) &&
10481
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10484
xmlGenericError(xmlGenericErrorContext,
10485
"PP: Parsing PI\n");
10488
ctxt->checkIndex = 0;
10489
} else if ((cur == '<') && (next == '!') &&
10490
(ctxt->input->cur[2] == '-') &&
10491
(ctxt->input->cur[3] == '-')) {
10492
if ((!terminate) &&
10493
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10496
xmlGenericError(xmlGenericErrorContext,
10497
"PP: Parsing Comment\n");
10499
xmlParseComment(ctxt);
10500
ctxt->instate = XML_PARSER_MISC;
10501
ctxt->checkIndex = 0;
10502
} else if ((cur == '<') && (next == '!') &&
10503
(ctxt->input->cur[2] == 'D') &&
10504
(ctxt->input->cur[3] == 'O') &&
10505
(ctxt->input->cur[4] == 'C') &&
10506
(ctxt->input->cur[5] == 'T') &&
10507
(ctxt->input->cur[6] == 'Y') &&
10508
(ctxt->input->cur[7] == 'P') &&
10509
(ctxt->input->cur[8] == 'E')) {
10510
if ((!terminate) &&
10511
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10514
xmlGenericError(xmlGenericErrorContext,
10515
"PP: Parsing internal subset\n");
10517
ctxt->inSubset = 1;
10518
xmlParseDocTypeDecl(ctxt);
10520
ctxt->instate = XML_PARSER_DTD;
10522
xmlGenericError(xmlGenericErrorContext,
10523
"PP: entering DTD\n");
10527
* Create and update the external subset.
10529
ctxt->inSubset = 2;
10530
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10531
(ctxt->sax->externalSubset != NULL))
10532
ctxt->sax->externalSubset(ctxt->userData,
10533
ctxt->intSubName, ctxt->extSubSystem,
10535
ctxt->inSubset = 0;
10536
xmlCleanSpecialAttr(ctxt);
10537
ctxt->instate = XML_PARSER_PROLOG;
10539
xmlGenericError(xmlGenericErrorContext,
10540
"PP: entering PROLOG\n");
10543
} else if ((cur == '<') && (next == '!') &&
10547
ctxt->instate = XML_PARSER_START_TAG;
10548
ctxt->progressive = 1;
10549
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10551
xmlGenericError(xmlGenericErrorContext,
10552
"PP: entering START_TAG\n");
10556
case XML_PARSER_PROLOG:
10558
if (ctxt->input->buf == NULL)
10559
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10561
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10564
cur = ctxt->input->cur[0];
10565
next = ctxt->input->cur[1];
10566
if ((cur == '<') && (next == '?')) {
10567
if ((!terminate) &&
10568
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10571
xmlGenericError(xmlGenericErrorContext,
10572
"PP: Parsing PI\n");
10575
} else if ((cur == '<') && (next == '!') &&
10576
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10577
if ((!terminate) &&
10578
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10581
xmlGenericError(xmlGenericErrorContext,
10582
"PP: Parsing Comment\n");
10584
xmlParseComment(ctxt);
10585
ctxt->instate = XML_PARSER_PROLOG;
10586
} else if ((cur == '<') && (next == '!') &&
10590
ctxt->instate = XML_PARSER_START_TAG;
10591
if (ctxt->progressive == 0)
10592
ctxt->progressive = 1;
10593
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10595
xmlGenericError(xmlGenericErrorContext,
10596
"PP: entering START_TAG\n");
10600
case XML_PARSER_EPILOG:
10602
if (ctxt->input->buf == NULL)
10603
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10605
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10608
cur = ctxt->input->cur[0];
10609
next = ctxt->input->cur[1];
10610
if ((cur == '<') && (next == '?')) {
10611
if ((!terminate) &&
10612
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10615
xmlGenericError(xmlGenericErrorContext,
10616
"PP: Parsing PI\n");
10619
ctxt->instate = XML_PARSER_EPILOG;
10620
} else if ((cur == '<') && (next == '!') &&
10621
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10622
if ((!terminate) &&
10623
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10626
xmlGenericError(xmlGenericErrorContext,
10627
"PP: Parsing Comment\n");
10629
xmlParseComment(ctxt);
10630
ctxt->instate = XML_PARSER_EPILOG;
10631
} else if ((cur == '<') && (next == '!') &&
10635
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10636
ctxt->instate = XML_PARSER_EOF;
10638
xmlGenericError(xmlGenericErrorContext,
10639
"PP: entering EOF\n");
10641
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10642
ctxt->sax->endDocument(ctxt->userData);
10646
case XML_PARSER_DTD: {
10648
* Sorry but progressive parsing of the internal subset
10649
* is not expected to be supported. We first check that
10650
* the full content of the internal subset is available and
10651
* the parsing is launched only at that point.
10652
* Internal subset ends up with "']' S? '>'" in an unescaped
10653
* section and not in a ']]>' sequence which are conditional
10654
* sections (whoever argued to keep that crap in XML deserve
10655
* a place in hell !).
10661
base = ctxt->input->cur - ctxt->input->base;
10662
if (base < 0) return(0);
10663
if (ctxt->checkIndex > base)
10664
base = ctxt->checkIndex;
10665
buf = ctxt->input->buf->buffer->content;
10666
for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10669
if (buf[base] == quote)
10673
if ((quote == 0) && (buf[base] == '<')) {
10675
/* special handling of comments */
10676
if (((unsigned int) base + 4 <
10677
ctxt->input->buf->buffer->use) &&
10678
(buf[base + 1] == '!') &&
10679
(buf[base + 2] == '-') &&
10680
(buf[base + 3] == '-')) {
10681
for (;(unsigned int) base + 3 <
10682
ctxt->input->buf->buffer->use; base++) {
10683
if ((buf[base] == '-') &&
10684
(buf[base + 1] == '-') &&
10685
(buf[base + 2] == '>')) {
10693
fprintf(stderr, "unfinished comment\n");
10700
if (buf[base] == '"') {
10704
if (buf[base] == '\'') {
10708
if (buf[base] == ']') {
10710
fprintf(stderr, "%c%c%c%c: ", buf[base],
10711
buf[base + 1], buf[base + 2], buf[base + 3]);
10713
if ((unsigned int) base +1 >=
10714
ctxt->input->buf->buffer->use)
10716
if (buf[base + 1] == ']') {
10717
/* conditional crap, skip both ']' ! */
10722
(unsigned int) base + i < ctxt->input->buf->buffer->use;
10724
if (buf[base + i] == '>') {
10726
fprintf(stderr, "found\n");
10728
goto found_end_int_subset;
10730
if (!IS_BLANK_CH(buf[base + i])) {
10732
fprintf(stderr, "not found\n");
10734
goto not_end_of_int_subset;
10738
fprintf(stderr, "end of stream\n");
10743
not_end_of_int_subset:
10744
continue; /* for */
10747
* We didn't found the end of the Internal subset
10751
xmlGenericError(xmlGenericErrorContext,
10752
"PP: lookup of int subset end filed\n");
10756
found_end_int_subset:
10757
xmlParseInternalSubset(ctxt);
10758
ctxt->inSubset = 2;
10759
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10760
(ctxt->sax->externalSubset != NULL))
10761
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10762
ctxt->extSubSystem, ctxt->extSubURI);
10763
ctxt->inSubset = 0;
10764
xmlCleanSpecialAttr(ctxt);
10765
ctxt->instate = XML_PARSER_PROLOG;
10766
ctxt->checkIndex = 0;
10768
xmlGenericError(xmlGenericErrorContext,
10769
"PP: entering PROLOG\n");
10773
case XML_PARSER_COMMENT:
10774
xmlGenericError(xmlGenericErrorContext,
10775
"PP: internal error, state == COMMENT\n");
10776
ctxt->instate = XML_PARSER_CONTENT;
10778
xmlGenericError(xmlGenericErrorContext,
10779
"PP: entering CONTENT\n");
10782
case XML_PARSER_IGNORE:
10783
xmlGenericError(xmlGenericErrorContext,
10784
"PP: internal error, state == IGNORE");
10785
ctxt->instate = XML_PARSER_DTD;
10787
xmlGenericError(xmlGenericErrorContext,
10788
"PP: entering DTD\n");
10791
case XML_PARSER_PI:
10792
xmlGenericError(xmlGenericErrorContext,
10793
"PP: internal error, state == PI\n");
10794
ctxt->instate = XML_PARSER_CONTENT;
10796
xmlGenericError(xmlGenericErrorContext,
10797
"PP: entering CONTENT\n");
10800
case XML_PARSER_ENTITY_DECL:
10801
xmlGenericError(xmlGenericErrorContext,
10802
"PP: internal error, state == ENTITY_DECL\n");
10803
ctxt->instate = XML_PARSER_DTD;
10805
xmlGenericError(xmlGenericErrorContext,
10806
"PP: entering DTD\n");
10809
case XML_PARSER_ENTITY_VALUE:
10810
xmlGenericError(xmlGenericErrorContext,
10811
"PP: internal error, state == ENTITY_VALUE\n");
10812
ctxt->instate = XML_PARSER_CONTENT;
10814
xmlGenericError(xmlGenericErrorContext,
10815
"PP: entering DTD\n");
10818
case XML_PARSER_ATTRIBUTE_VALUE:
10819
xmlGenericError(xmlGenericErrorContext,
10820
"PP: internal error, state == ATTRIBUTE_VALUE\n");
10821
ctxt->instate = XML_PARSER_START_TAG;
10823
xmlGenericError(xmlGenericErrorContext,
10824
"PP: entering START_TAG\n");
10827
case XML_PARSER_SYSTEM_LITERAL:
10828
xmlGenericError(xmlGenericErrorContext,
10829
"PP: internal error, state == SYSTEM_LITERAL\n");
10830
ctxt->instate = XML_PARSER_START_TAG;
10832
xmlGenericError(xmlGenericErrorContext,
10833
"PP: entering START_TAG\n");
10836
case XML_PARSER_PUBLIC_LITERAL:
10837
xmlGenericError(xmlGenericErrorContext,
10838
"PP: internal error, state == PUBLIC_LITERAL\n");
10839
ctxt->instate = XML_PARSER_START_TAG;
10841
xmlGenericError(xmlGenericErrorContext,
10842
"PP: entering START_TAG\n");
10849
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10856
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10857
ctxt->input->cur[0], ctxt->input->cur[1],
10858
ctxt->input->cur[2], ctxt->input->cur[3]);
10859
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10860
"Input is not proper UTF-8, indicate encoding !\n%s",
10861
BAD_CAST buffer, NULL);
10868
* @ctxt: an XML parser context
10869
* @chunk: an char array
10870
* @size: the size in byte of the chunk
10871
* @terminate: last chunk indicator
10873
* Parse a Chunk of memory
10875
* Returns zero if no error, the xmlParserErrors otherwise.
10878
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10883
return(XML_ERR_INTERNAL_ERROR);
10884
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10885
return(ctxt->errNo);
10886
if (ctxt->instate == XML_PARSER_START)
10887
xmlDetectSAX2(ctxt);
10888
if ((size > 0) && (chunk != NULL) && (!terminate) &&
10889
(chunk[size - 1] == '\r')) {
10893
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10894
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10895
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10896
int cur = ctxt->input->cur - ctxt->input->base;
10899
res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10901
ctxt->errNo = XML_PARSER_EOF;
10902
ctxt->disableSAX = 1;
10903
return (XML_PARSER_EOF);
10905
ctxt->input->base = ctxt->input->buf->buffer->content + base;
10906
ctxt->input->cur = ctxt->input->base + cur;
10908
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10910
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10913
} else if (ctxt->instate != XML_PARSER_EOF) {
10914
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10915
xmlParserInputBufferPtr in = ctxt->input->buf;
10916
if ((in->encoder != NULL) && (in->buffer != NULL) &&
10917
(in->raw != NULL)) {
10920
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10923
xmlGenericError(xmlGenericErrorContext,
10924
"xmlParseChunk: encoder error\n");
10925
return(XML_ERR_INVALID_ENCODING);
10930
xmlParseTryOrFinish(ctxt, terminate);
10931
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10932
(ctxt->input->buf != NULL)) {
10933
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10935
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10936
return(ctxt->errNo);
10939
* Check for termination
10943
if (ctxt->input != NULL) {
10944
if (ctxt->input->buf == NULL)
10945
avail = ctxt->input->length -
10946
(ctxt->input->cur - ctxt->input->base);
10948
avail = ctxt->input->buf->buffer->use -
10949
(ctxt->input->cur - ctxt->input->base);
10952
if ((ctxt->instate != XML_PARSER_EOF) &&
10953
(ctxt->instate != XML_PARSER_EPILOG)) {
10954
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10956
if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10957
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10959
if (ctxt->instate != XML_PARSER_EOF) {
10960
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10961
ctxt->sax->endDocument(ctxt->userData);
10963
ctxt->instate = XML_PARSER_EOF;
10965
return((xmlParserErrors) ctxt->errNo);
10968
/************************************************************************
10970
* I/O front end functions to the parser *
10972
************************************************************************/
10975
* xmlCreatePushParserCtxt:
10976
* @sax: a SAX handler
10977
* @user_data: The user data returned on SAX callbacks
10978
* @chunk: a pointer to an array of chars
10979
* @size: number of chars in the array
10980
* @filename: an optional file name or URI
10982
* Create a parser context for using the XML parser in push mode.
10983
* If @buffer and @size are non-NULL, the data is used to detect
10984
* the encoding. The remaining characters will be parsed so they
10985
* don't need to be fed in again through xmlParseChunk.
10986
* To allow content encoding detection, @size should be >= 4
10987
* The value of @filename is used for fetching external entities
10988
* and error/warning reports.
10990
* Returns the new parser context or NULL
10994
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10995
const char *chunk, int size, const char *filename) {
10996
xmlParserCtxtPtr ctxt;
10997
xmlParserInputPtr inputStream;
10998
xmlParserInputBufferPtr buf;
10999
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11002
* plug some encoding conversion routines
11004
if ((chunk != NULL) && (size >= 4))
11005
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11007
buf = xmlAllocParserInputBuffer(enc);
11008
if (buf == NULL) return(NULL);
11010
ctxt = xmlNewParserCtxt();
11011
if (ctxt == NULL) {
11012
xmlErrMemory(NULL, "creating parser: out of memory\n");
11013
xmlFreeParserInputBuffer(buf);
11016
ctxt->dictNames = 1;
11017
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11018
if (ctxt->pushTab == NULL) {
11019
xmlErrMemory(ctxt, NULL);
11020
xmlFreeParserInputBuffer(buf);
11021
xmlFreeParserCtxt(ctxt);
11025
#ifdef LIBXML_SAX1_ENABLED
11026
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11027
#endif /* LIBXML_SAX1_ENABLED */
11028
xmlFree(ctxt->sax);
11029
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11030
if (ctxt->sax == NULL) {
11031
xmlErrMemory(ctxt, NULL);
11032
xmlFreeParserInputBuffer(buf);
11033
xmlFreeParserCtxt(ctxt);
11036
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11037
if (sax->initialized == XML_SAX2_MAGIC)
11038
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11040
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11041
if (user_data != NULL)
11042
ctxt->userData = user_data;
11044
if (filename == NULL) {
11045
ctxt->directory = NULL;
11047
ctxt->directory = xmlParserGetDirectory(filename);
11050
inputStream = xmlNewInputStream(ctxt);
11051
if (inputStream == NULL) {
11052
xmlFreeParserCtxt(ctxt);
11053
xmlFreeParserInputBuffer(buf);
11057
if (filename == NULL)
11058
inputStream->filename = NULL;
11060
inputStream->filename = (char *)
11061
xmlCanonicPath((const xmlChar *) filename);
11062
if (inputStream->filename == NULL) {
11063
xmlFreeParserCtxt(ctxt);
11064
xmlFreeParserInputBuffer(buf);
11068
inputStream->buf = buf;
11069
inputStream->base = inputStream->buf->buffer->content;
11070
inputStream->cur = inputStream->buf->buffer->content;
11072
&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11074
inputPush(ctxt, inputStream);
11077
* If the caller didn't provide an initial 'chunk' for determining
11078
* the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11079
* that it can be automatically determined later
11081
if ((size == 0) || (chunk == NULL)) {
11082
ctxt->charset = XML_CHAR_ENCODING_NONE;
11083
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11084
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11085
int cur = ctxt->input->cur - ctxt->input->base;
11087
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11089
ctxt->input->base = ctxt->input->buf->buffer->content + base;
11090
ctxt->input->cur = ctxt->input->base + cur;
11092
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11094
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11098
if (enc != XML_CHAR_ENCODING_NONE) {
11099
xmlSwitchEncoding(ctxt, enc);
11104
#endif /* LIBXML_PUSH_ENABLED */
11108
* @ctxt: an XML parser context
11110
* Blocks further parser processing
11113
xmlStopParser(xmlParserCtxtPtr ctxt) {
11116
ctxt->instate = XML_PARSER_EOF;
11117
ctxt->disableSAX = 1;
11118
if (ctxt->input != NULL) {
11119
ctxt->input->cur = BAD_CAST"";
11120
ctxt->input->base = ctxt->input->cur;
11125
* xmlCreateIOParserCtxt:
11126
* @sax: a SAX handler
11127
* @user_data: The user data returned on SAX callbacks
11128
* @ioread: an I/O read function
11129
* @ioclose: an I/O close function
11130
* @ioctx: an I/O handler
11131
* @enc: the charset encoding if known
11133
* Create a parser context for using the XML parser with an existing
11136
* Returns the new parser context or NULL
11139
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11140
xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11141
void *ioctx, xmlCharEncoding enc) {
11142
xmlParserCtxtPtr ctxt;
11143
xmlParserInputPtr inputStream;
11144
xmlParserInputBufferPtr buf;
11146
if (ioread == NULL) return(NULL);
11148
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11149
if (buf == NULL) return(NULL);
11151
ctxt = xmlNewParserCtxt();
11152
if (ctxt == NULL) {
11153
xmlFreeParserInputBuffer(buf);
11157
#ifdef LIBXML_SAX1_ENABLED
11158
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11159
#endif /* LIBXML_SAX1_ENABLED */
11160
xmlFree(ctxt->sax);
11161
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11162
if (ctxt->sax == NULL) {
11163
xmlErrMemory(ctxt, NULL);
11164
xmlFreeParserCtxt(ctxt);
11167
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11168
if (sax->initialized == XML_SAX2_MAGIC)
11169
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11171
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11172
if (user_data != NULL)
11173
ctxt->userData = user_data;
11176
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11177
if (inputStream == NULL) {
11178
xmlFreeParserCtxt(ctxt);
11181
inputPush(ctxt, inputStream);
11186
#ifdef LIBXML_VALID_ENABLED
11187
/************************************************************************
11189
* Front ends when parsing a DTD *
11191
************************************************************************/
11195
* @sax: the SAX handler block or NULL
11196
* @input: an Input Buffer
11197
* @enc: the charset encoding if known
11199
* Load and parse a DTD
11201
* Returns the resulting xmlDtdPtr or NULL in case of error.
11202
* @input will be freed by the function in any case.
11206
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11207
xmlCharEncoding enc) {
11208
xmlDtdPtr ret = NULL;
11209
xmlParserCtxtPtr ctxt;
11210
xmlParserInputPtr pinput = NULL;
11216
ctxt = xmlNewParserCtxt();
11217
if (ctxt == NULL) {
11218
xmlFreeParserInputBuffer(input);
11223
* Set-up the SAX context
11226
if (ctxt->sax != NULL)
11227
xmlFree(ctxt->sax);
11229
ctxt->userData = ctxt;
11231
xmlDetectSAX2(ctxt);
11234
* generate a parser input from the I/O handler
11237
pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11238
if (pinput == NULL) {
11239
if (sax != NULL) ctxt->sax = NULL;
11240
xmlFreeParserInputBuffer(input);
11241
xmlFreeParserCtxt(ctxt);
11246
* plug some encoding conversion routines here.
11248
xmlPushInput(ctxt, pinput);
11249
if (enc != XML_CHAR_ENCODING_NONE) {
11250
xmlSwitchEncoding(ctxt, enc);
11253
pinput->filename = NULL;
11256
pinput->base = ctxt->input->cur;
11257
pinput->cur = ctxt->input->cur;
11258
pinput->free = NULL;
11261
* let's parse that entity knowing it's an external subset.
11263
ctxt->inSubset = 2;
11264
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11265
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11266
BAD_CAST "none", BAD_CAST "none");
11268
if ((enc == XML_CHAR_ENCODING_NONE) &&
11269
((ctxt->input->end - ctxt->input->cur) >= 4)) {
11271
* Get the 4 first bytes and decode the charset
11272
* if enc != XML_CHAR_ENCODING_NONE
11273
* plug some encoding conversion routines.
11279
enc = xmlDetectCharEncoding(start, 4);
11280
if (enc != XML_CHAR_ENCODING_NONE) {
11281
xmlSwitchEncoding(ctxt, enc);
11285
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11287
if (ctxt->myDoc != NULL) {
11288
if (ctxt->wellFormed) {
11289
ret = ctxt->myDoc->extSubset;
11290
ctxt->myDoc->extSubset = NULL;
11295
tmp = ret->children;
11296
while (tmp != NULL) {
11304
xmlFreeDoc(ctxt->myDoc);
11305
ctxt->myDoc = NULL;
11307
if (sax != NULL) ctxt->sax = NULL;
11308
xmlFreeParserCtxt(ctxt);
11315
* @sax: the SAX handler block
11316
* @ExternalID: a NAME* containing the External ID of the DTD
11317
* @SystemID: a NAME* containing the URL to the DTD
11319
* Load and parse an external subset.
11321
* Returns the resulting xmlDtdPtr or NULL in case of error.
11325
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11326
const xmlChar *SystemID) {
11327
xmlDtdPtr ret = NULL;
11328
xmlParserCtxtPtr ctxt;
11329
xmlParserInputPtr input = NULL;
11330
xmlCharEncoding enc;
11331
xmlChar* systemIdCanonic;
11333
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11335
ctxt = xmlNewParserCtxt();
11336
if (ctxt == NULL) {
11341
* Set-up the SAX context
11344
if (ctxt->sax != NULL)
11345
xmlFree(ctxt->sax);
11347
ctxt->userData = ctxt;
11351
* Canonicalise the system ID
11353
systemIdCanonic = xmlCanonicPath(SystemID);
11354
if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11355
xmlFreeParserCtxt(ctxt);
11360
* Ask the Entity resolver to load the damn thing
11363
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11364
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11366
if (input == NULL) {
11367
if (sax != NULL) ctxt->sax = NULL;
11368
xmlFreeParserCtxt(ctxt);
11369
if (systemIdCanonic != NULL)
11370
xmlFree(systemIdCanonic);
11375
* plug some encoding conversion routines here.
11377
xmlPushInput(ctxt, input);
11378
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11379
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11380
xmlSwitchEncoding(ctxt, enc);
11383
if (input->filename == NULL)
11384
input->filename = (char *) systemIdCanonic;
11386
xmlFree(systemIdCanonic);
11389
input->base = ctxt->input->cur;
11390
input->cur = ctxt->input->cur;
11391
input->free = NULL;
11394
* let's parse that entity knowing it's an external subset.
11396
ctxt->inSubset = 2;
11397
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11398
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11399
ExternalID, SystemID);
11400
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11402
if (ctxt->myDoc != NULL) {
11403
if (ctxt->wellFormed) {
11404
ret = ctxt->myDoc->extSubset;
11405
ctxt->myDoc->extSubset = NULL;
11410
tmp = ret->children;
11411
while (tmp != NULL) {
11419
xmlFreeDoc(ctxt->myDoc);
11420
ctxt->myDoc = NULL;
11422
if (sax != NULL) ctxt->sax = NULL;
11423
xmlFreeParserCtxt(ctxt);
11431
* @ExternalID: a NAME* containing the External ID of the DTD
11432
* @SystemID: a NAME* containing the URL to the DTD
11434
* Load and parse an external subset.
11436
* Returns the resulting xmlDtdPtr or NULL in case of error.
11440
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11441
return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11443
#endif /* LIBXML_VALID_ENABLED */
11445
/************************************************************************
11447
* Front ends when parsing an Entity *
11449
************************************************************************/
11452
* xmlParseCtxtExternalEntity:
11453
* @ctx: the existing parsing context
11454
* @URL: the URL for the entity to load
11455
* @ID: the System ID for the entity to load
11456
* @lst: the return value for the set of parsed nodes
11458
* Parse an external general entity within an existing parsing context
11459
* An external general parsed entity is well-formed if it matches the
11460
* production labeled extParsedEnt.
11462
* [78] extParsedEnt ::= TextDecl? content
11464
* Returns 0 if the entity is well formed, -1 in case of args problem and
11465
* the parser error code otherwise
11469
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11470
const xmlChar *ID, xmlNodePtr *lst) {
11471
xmlParserCtxtPtr ctxt;
11473
xmlNodePtr newRoot;
11474
xmlSAXHandlerPtr oldsax = NULL;
11477
xmlCharEncoding enc;
11478
xmlParserInputPtr inputStream;
11479
char *directory = NULL;
11481
if (ctx == NULL) return(-1);
11483
if (ctx->depth > 40) {
11484
return(XML_ERR_ENTITY_LOOP);
11489
if ((URL == NULL) && (ID == NULL))
11491
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11494
ctxt = xmlNewParserCtxt();
11495
if (ctxt == NULL) {
11499
ctxt->userData = ctxt;
11500
ctxt->_private = ctx->_private;
11502
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11503
if (inputStream == NULL) {
11504
xmlFreeParserCtxt(ctxt);
11508
inputPush(ctxt, inputStream);
11510
if ((ctxt->directory == NULL) && (directory == NULL))
11511
directory = xmlParserGetDirectory((char *)URL);
11512
if ((ctxt->directory == NULL) && (directory != NULL))
11513
ctxt->directory = directory;
11515
oldsax = ctxt->sax;
11516
ctxt->sax = ctx->sax;
11517
xmlDetectSAX2(ctxt);
11518
newDoc = xmlNewDoc(BAD_CAST "1.0");
11519
if (newDoc == NULL) {
11520
xmlFreeParserCtxt(ctxt);
11523
if (ctx->myDoc->dict) {
11524
newDoc->dict = ctx->myDoc->dict;
11525
xmlDictReference(newDoc->dict);
11527
if (ctx->myDoc != NULL) {
11528
newDoc->intSubset = ctx->myDoc->intSubset;
11529
newDoc->extSubset = ctx->myDoc->extSubset;
11531
if (ctx->myDoc->URL != NULL) {
11532
newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11534
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11535
if (newRoot == NULL) {
11536
ctxt->sax = oldsax;
11537
xmlFreeParserCtxt(ctxt);
11538
newDoc->intSubset = NULL;
11539
newDoc->extSubset = NULL;
11540
xmlFreeDoc(newDoc);
11543
xmlAddChild((xmlNodePtr) newDoc, newRoot);
11544
nodePush(ctxt, newDoc->children);
11545
if (ctx->myDoc == NULL) {
11546
ctxt->myDoc = newDoc;
11548
ctxt->myDoc = ctx->myDoc;
11549
newDoc->children->doc = ctx->myDoc;
11553
* Get the 4 first bytes and decode the charset
11554
* if enc != XML_CHAR_ENCODING_NONE
11555
* plug some encoding conversion routines.
11558
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11563
enc = xmlDetectCharEncoding(start, 4);
11564
if (enc != XML_CHAR_ENCODING_NONE) {
11565
xmlSwitchEncoding(ctxt, enc);
11570
* Parse a possible text declaration first
11572
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11573
xmlParseTextDecl(ctxt);
11577
* Doing validity checking on chunk doesn't make sense
11579
ctxt->instate = XML_PARSER_CONTENT;
11580
ctxt->validate = ctx->validate;
11581
ctxt->valid = ctx->valid;
11582
ctxt->loadsubset = ctx->loadsubset;
11583
ctxt->depth = ctx->depth + 1;
11584
ctxt->replaceEntities = ctx->replaceEntities;
11585
if (ctxt->validate) {
11586
ctxt->vctxt.error = ctx->vctxt.error;
11587
ctxt->vctxt.warning = ctx->vctxt.warning;
11589
ctxt->vctxt.error = NULL;
11590
ctxt->vctxt.warning = NULL;
11592
ctxt->vctxt.nodeTab = NULL;
11593
ctxt->vctxt.nodeNr = 0;
11594
ctxt->vctxt.nodeMax = 0;
11595
ctxt->vctxt.node = NULL;
11596
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11597
ctxt->dict = ctx->dict;
11598
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11599
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11600
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11601
ctxt->dictNames = ctx->dictNames;
11602
ctxt->attsDefault = ctx->attsDefault;
11603
ctxt->attsSpecial = ctx->attsSpecial;
11604
ctxt->linenumbers = ctx->linenumbers;
11606
xmlParseContent(ctxt);
11608
ctx->validate = ctxt->validate;
11609
ctx->valid = ctxt->valid;
11610
if ((RAW == '<') && (NXT(1) == '/')) {
11611
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11612
} else if (RAW != 0) {
11613
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11615
if (ctxt->node != newDoc->children) {
11616
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11619
if (!ctxt->wellFormed) {
11620
if (ctxt->errNo == 0)
11629
* Return the newly created nodeset after unlinking it from
11630
* they pseudo parent.
11632
cur = newDoc->children->children;
11634
while (cur != NULL) {
11635
cur->parent = NULL;
11638
newDoc->children->children = NULL;
11642
ctxt->sax = oldsax;
11644
ctxt->attsDefault = NULL;
11645
ctxt->attsSpecial = NULL;
11646
xmlFreeParserCtxt(ctxt);
11647
newDoc->intSubset = NULL;
11648
newDoc->extSubset = NULL;
11649
xmlFreeDoc(newDoc);
11655
* xmlParseExternalEntityPrivate:
11656
* @doc: the document the chunk pertains to
11657
* @oldctxt: the previous parser context if available
11658
* @sax: the SAX handler bloc (possibly NULL)
11659
* @user_data: The user data returned on SAX callbacks (possibly NULL)
11660
* @depth: Used for loop detection, use 0
11661
* @URL: the URL for the entity to load
11662
* @ID: the System ID for the entity to load
11663
* @list: the return value for the set of parsed nodes
11665
* Private version of xmlParseExternalEntity()
11667
* Returns 0 if the entity is well formed, -1 in case of args problem and
11668
* the parser error code otherwise
11671
static xmlParserErrors
11672
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11673
xmlSAXHandlerPtr sax,
11674
void *user_data, int depth, const xmlChar *URL,
11675
const xmlChar *ID, xmlNodePtr *list) {
11676
xmlParserCtxtPtr ctxt;
11678
xmlNodePtr newRoot;
11679
xmlSAXHandlerPtr oldsax = NULL;
11680
xmlParserErrors ret = XML_ERR_OK;
11682
xmlCharEncoding enc;
11685
return(XML_ERR_ENTITY_LOOP);
11692
if ((URL == NULL) && (ID == NULL))
11693
return(XML_ERR_INTERNAL_ERROR);
11695
return(XML_ERR_INTERNAL_ERROR);
11698
ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11699
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11700
ctxt->userData = ctxt;
11701
if (oldctxt != NULL) {
11702
ctxt->_private = oldctxt->_private;
11703
ctxt->loadsubset = oldctxt->loadsubset;
11704
ctxt->validate = oldctxt->validate;
11705
ctxt->external = oldctxt->external;
11706
ctxt->record_info = oldctxt->record_info;
11707
ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11708
ctxt->node_seq.length = oldctxt->node_seq.length;
11709
ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11712
* Doing validity checking on chunk without context
11713
* doesn't make sense
11715
ctxt->_private = NULL;
11716
ctxt->validate = 0;
11717
ctxt->external = 2;
11718
ctxt->loadsubset = 0;
11721
oldsax = ctxt->sax;
11723
if (user_data != NULL)
11724
ctxt->userData = user_data;
11726
xmlDetectSAX2(ctxt);
11727
newDoc = xmlNewDoc(BAD_CAST "1.0");
11728
if (newDoc == NULL) {
11729
ctxt->node_seq.maximum = 0;
11730
ctxt->node_seq.length = 0;
11731
ctxt->node_seq.buffer = NULL;
11732
xmlFreeParserCtxt(ctxt);
11733
return(XML_ERR_INTERNAL_ERROR);
11735
newDoc->intSubset = doc->intSubset;
11736
newDoc->extSubset = doc->extSubset;
11737
newDoc->dict = doc->dict;
11738
xmlDictReference(newDoc->dict);
11740
if (doc->URL != NULL) {
11741
newDoc->URL = xmlStrdup(doc->URL);
11743
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11744
if (newRoot == NULL) {
11746
ctxt->sax = oldsax;
11747
ctxt->node_seq.maximum = 0;
11748
ctxt->node_seq.length = 0;
11749
ctxt->node_seq.buffer = NULL;
11750
xmlFreeParserCtxt(ctxt);
11751
newDoc->intSubset = NULL;
11752
newDoc->extSubset = NULL;
11753
xmlFreeDoc(newDoc);
11754
return(XML_ERR_INTERNAL_ERROR);
11756
xmlAddChild((xmlNodePtr) newDoc, newRoot);
11757
nodePush(ctxt, newDoc->children);
11759
newRoot->doc = doc;
11762
* Get the 4 first bytes and decode the charset
11763
* if enc != XML_CHAR_ENCODING_NONE
11764
* plug some encoding conversion routines.
11767
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11772
enc = xmlDetectCharEncoding(start, 4);
11773
if (enc != XML_CHAR_ENCODING_NONE) {
11774
xmlSwitchEncoding(ctxt, enc);
11779
* Parse a possible text declaration first
11781
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11782
xmlParseTextDecl(ctxt);
11785
ctxt->instate = XML_PARSER_CONTENT;
11786
ctxt->depth = depth;
11788
xmlParseContent(ctxt);
11790
if ((RAW == '<') && (NXT(1) == '/')) {
11791
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11792
} else if (RAW != 0) {
11793
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11795
if (ctxt->node != newDoc->children) {
11796
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11799
if (!ctxt->wellFormed) {
11800
if (ctxt->errNo == 0)
11801
ret = XML_ERR_INTERNAL_ERROR;
11803
ret = (xmlParserErrors)ctxt->errNo;
11805
if (list != NULL) {
11809
* Return the newly created nodeset after unlinking it from
11810
* they pseudo parent.
11812
cur = newDoc->children->children;
11814
while (cur != NULL) {
11815
cur->parent = NULL;
11818
newDoc->children->children = NULL;
11823
ctxt->sax = oldsax;
11824
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11825
oldctxt->node_seq.length = ctxt->node_seq.length;
11826
oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11827
ctxt->node_seq.maximum = 0;
11828
ctxt->node_seq.length = 0;
11829
ctxt->node_seq.buffer = NULL;
11830
xmlFreeParserCtxt(ctxt);
11831
newDoc->intSubset = NULL;
11832
newDoc->extSubset = NULL;
11833
xmlFreeDoc(newDoc);
11838
#ifdef LIBXML_SAX1_ENABLED
11840
* xmlParseExternalEntity:
11841
* @doc: the document the chunk pertains to
11842
* @sax: the SAX handler bloc (possibly NULL)
11843
* @user_data: The user data returned on SAX callbacks (possibly NULL)
11844
* @depth: Used for loop detection, use 0
11845
* @URL: the URL for the entity to load
11846
* @ID: the System ID for the entity to load
11847
* @lst: the return value for the set of parsed nodes
11849
* Parse an external general entity
11850
* An external general parsed entity is well-formed if it matches the
11851
* production labeled extParsedEnt.
11853
* [78] extParsedEnt ::= TextDecl? content
11855
* Returns 0 if the entity is well formed, -1 in case of args problem and
11856
* the parser error code otherwise
11860
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11861
int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11862
return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11867
* xmlParseBalancedChunkMemory:
11868
* @doc: the document the chunk pertains to
11869
* @sax: the SAX handler bloc (possibly NULL)
11870
* @user_data: The user data returned on SAX callbacks (possibly NULL)
11871
* @depth: Used for loop detection, use 0
11872
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
11873
* @lst: the return value for the set of parsed nodes
11875
* Parse a well-balanced chunk of an XML document
11876
* called by the parser
11877
* The allowed sequence for the Well Balanced Chunk is the one defined by
11878
* the content production in the XML grammar:
11880
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11882
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
11883
* the parser error code otherwise
11887
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11888
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11889
return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11890
depth, string, lst, 0 );
11892
#endif /* LIBXML_SAX1_ENABLED */
11895
* xmlParseBalancedChunkMemoryInternal:
11896
* @oldctxt: the existing parsing context
11897
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
11898
* @user_data: the user data field for the parser context
11899
* @lst: the return value for the set of parsed nodes
11902
* Parse a well-balanced chunk of an XML document
11903
* called by the parser
11904
* The allowed sequence for the Well Balanced Chunk is the one defined by
11905
* the content production in the XML grammar:
11907
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11909
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
11910
* error code otherwise
11912
* In case recover is set to 1, the nodelist will not be empty even if
11913
* the parsed chunk is not well balanced.
11915
static xmlParserErrors
11916
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11917
const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11918
xmlParserCtxtPtr ctxt;
11919
xmlDocPtr newDoc = NULL;
11920
xmlNodePtr newRoot;
11921
xmlSAXHandlerPtr oldsax = NULL;
11922
xmlNodePtr content = NULL;
11923
xmlNodePtr last = NULL;
11925
xmlParserErrors ret = XML_ERR_OK;
11927
if (oldctxt->depth > 40) {
11928
return(XML_ERR_ENTITY_LOOP);
11934
if (string == NULL)
11935
return(XML_ERR_INTERNAL_ERROR);
11937
size = xmlStrlen(string);
11939
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11940
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11941
if (user_data != NULL)
11942
ctxt->userData = user_data;
11944
ctxt->userData = ctxt;
11945
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11946
ctxt->dict = oldctxt->dict;
11947
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11948
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11949
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11951
oldsax = ctxt->sax;
11952
ctxt->sax = oldctxt->sax;
11953
xmlDetectSAX2(ctxt);
11954
ctxt->replaceEntities = oldctxt->replaceEntities;
11955
ctxt->options = oldctxt->options;
11957
ctxt->_private = oldctxt->_private;
11958
if (oldctxt->myDoc == NULL) {
11959
newDoc = xmlNewDoc(BAD_CAST "1.0");
11960
if (newDoc == NULL) {
11961
ctxt->sax = oldsax;
11963
xmlFreeParserCtxt(ctxt);
11964
return(XML_ERR_INTERNAL_ERROR);
11966
newDoc->dict = ctxt->dict;
11967
xmlDictReference(newDoc->dict);
11968
ctxt->myDoc = newDoc;
11970
ctxt->myDoc = oldctxt->myDoc;
11971
content = ctxt->myDoc->children;
11972
last = ctxt->myDoc->last;
11974
newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11975
if (newRoot == NULL) {
11976
ctxt->sax = oldsax;
11978
xmlFreeParserCtxt(ctxt);
11979
if (newDoc != NULL) {
11980
xmlFreeDoc(newDoc);
11982
return(XML_ERR_INTERNAL_ERROR);
11984
ctxt->myDoc->children = NULL;
11985
ctxt->myDoc->last = NULL;
11986
xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11987
nodePush(ctxt, ctxt->myDoc->children);
11988
ctxt->instate = XML_PARSER_CONTENT;
11989
ctxt->depth = oldctxt->depth + 1;
11991
ctxt->validate = 0;
11992
ctxt->loadsubset = oldctxt->loadsubset;
11993
if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11995
* ID/IDREF registration will be done in xmlValidateElement below
11997
ctxt->loadsubset |= XML_SKIP_IDS;
11999
ctxt->dictNames = oldctxt->dictNames;
12000
ctxt->attsDefault = oldctxt->attsDefault;
12001
ctxt->attsSpecial = oldctxt->attsSpecial;
12003
xmlParseContent(ctxt);
12004
if ((RAW == '<') && (NXT(1) == '/')) {
12005
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12006
} else if (RAW != 0) {
12007
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12009
if (ctxt->node != ctxt->myDoc->children) {
12010
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12013
if (!ctxt->wellFormed) {
12014
if (ctxt->errNo == 0)
12015
ret = XML_ERR_INTERNAL_ERROR;
12017
ret = (xmlParserErrors)ctxt->errNo;
12022
if ((lst != NULL) && (ret == XML_ERR_OK)) {
12026
* Return the newly created nodeset after unlinking it from
12027
* they pseudo parent.
12029
cur = ctxt->myDoc->children->children;
12031
while (cur != NULL) {
12032
#ifdef LIBXML_VALID_ENABLED
12033
if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12034
(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12035
(cur->type == XML_ELEMENT_NODE)) {
12036
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12037
oldctxt->myDoc, cur);
12039
#endif /* LIBXML_VALID_ENABLED */
12040
cur->parent = NULL;
12043
ctxt->myDoc->children->children = NULL;
12045
if (ctxt->myDoc != NULL) {
12046
xmlFreeNode(ctxt->myDoc->children);
12047
ctxt->myDoc->children = content;
12048
ctxt->myDoc->last = last;
12051
ctxt->sax = oldsax;
12053
ctxt->attsDefault = NULL;
12054
ctxt->attsSpecial = NULL;
12055
xmlFreeParserCtxt(ctxt);
12056
if (newDoc != NULL) {
12057
xmlFreeDoc(newDoc);
12064
* xmlParseInNodeContext:
12065
* @node: the context node
12066
* @data: the input string
12067
* @datalen: the input string length in bytes
12068
* @options: a combination of xmlParserOption
12069
* @lst: the return value for the set of parsed nodes
12071
* Parse a well-balanced chunk of an XML document
12072
* within the context (DTD, namespaces, etc ...) of the given node.
12074
* The allowed sequence for the data is a Well Balanced Chunk defined by
12075
* the content production in the XML grammar:
12077
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12079
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
12080
* error code otherwise
12083
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12084
int options, xmlNodePtr *lst) {
12086
xmlParserCtxtPtr ctxt;
12087
xmlDocPtr doc = NULL;
12088
xmlNodePtr fake, cur;
12091
xmlParserErrors ret = XML_ERR_OK;
12094
* check all input parameters, grab the document
12096
if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12097
return(XML_ERR_INTERNAL_ERROR);
12098
switch (node->type) {
12099
case XML_ELEMENT_NODE:
12100
case XML_ATTRIBUTE_NODE:
12101
case XML_TEXT_NODE:
12102
case XML_CDATA_SECTION_NODE:
12103
case XML_ENTITY_REF_NODE:
12105
case XML_COMMENT_NODE:
12106
case XML_DOCUMENT_NODE:
12107
case XML_HTML_DOCUMENT_NODE:
12110
return(XML_ERR_INTERNAL_ERROR);
12113
while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12114
(node->type != XML_DOCUMENT_NODE) &&
12115
(node->type != XML_HTML_DOCUMENT_NODE))
12116
node = node->parent;
12118
return(XML_ERR_INTERNAL_ERROR);
12119
if (node->type == XML_ELEMENT_NODE)
12122
doc = (xmlDocPtr) node;
12124
return(XML_ERR_INTERNAL_ERROR);
12127
* allocate a context and set-up everything not related to the
12128
* node position in the tree
12130
if (doc->type == XML_DOCUMENT_NODE)
12131
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12132
#ifdef LIBXML_HTML_ENABLED
12133
else if (doc->type == XML_HTML_DOCUMENT_NODE)
12134
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12137
return(XML_ERR_INTERNAL_ERROR);
12140
return(XML_ERR_NO_MEMORY);
12141
fake = xmlNewComment(NULL);
12142
if (fake == NULL) {
12143
xmlFreeParserCtxt(ctxt);
12144
return(XML_ERR_NO_MEMORY);
12146
xmlAddChild(node, fake);
12149
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12150
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12151
* we must wait until the last moment to free the original one.
12153
if (doc->dict != NULL) {
12154
if (ctxt->dict != NULL)
12155
xmlDictFree(ctxt->dict);
12156
ctxt->dict = doc->dict;
12158
options |= XML_PARSE_NODICT;
12160
xmlCtxtUseOptions(ctxt, options);
12161
xmlDetectSAX2(ctxt);
12164
if (node->type == XML_ELEMENT_NODE) {
12165
nodePush(ctxt, node);
12167
* initialize the SAX2 namespaces stack
12170
while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12171
xmlNsPtr ns = cur->nsDef;
12172
const xmlChar *iprefix, *ihref;
12174
while (ns != NULL) {
12176
iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12177
ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12179
iprefix = ns->prefix;
12183
if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12184
nsPush(ctxt, iprefix, ihref);
12191
ctxt->instate = XML_PARSER_CONTENT;
12194
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12196
* ID/IDREF registration will be done in xmlValidateElement below
12198
ctxt->loadsubset |= XML_SKIP_IDS;
12201
#ifdef LIBXML_HTML_ENABLED
12202
if (doc->type == XML_HTML_DOCUMENT_NODE)
12203
__htmlParseContent(ctxt);
12206
xmlParseContent(ctxt);
12209
if ((RAW == '<') && (NXT(1) == '/')) {
12210
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12211
} else if (RAW != 0) {
12212
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12214
if ((ctxt->node != NULL) && (ctxt->node != node)) {
12215
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12216
ctxt->wellFormed = 0;
12219
if (!ctxt->wellFormed) {
12220
if (ctxt->errNo == 0)
12221
ret = XML_ERR_INTERNAL_ERROR;
12223
ret = (xmlParserErrors)ctxt->errNo;
12229
* Return the newly created nodeset after unlinking it from
12230
* the pseudo sibling.
12243
while (cur != NULL) {
12244
cur->parent = NULL;
12248
xmlUnlinkNode(fake);
12252
if (ret != XML_ERR_OK) {
12253
xmlFreeNodeList(*lst);
12257
if (doc->dict != NULL)
12259
xmlFreeParserCtxt(ctxt);
12263
return(XML_ERR_INTERNAL_ERROR);
12267
#ifdef LIBXML_SAX1_ENABLED
12269
* xmlParseBalancedChunkMemoryRecover:
12270
* @doc: the document the chunk pertains to
12271
* @sax: the SAX handler bloc (possibly NULL)
12272
* @user_data: The user data returned on SAX callbacks (possibly NULL)
12273
* @depth: Used for loop detection, use 0
12274
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
12275
* @lst: the return value for the set of parsed nodes
12276
* @recover: return nodes even if the data is broken (use 0)
12279
* Parse a well-balanced chunk of an XML document
12280
* called by the parser
12281
* The allowed sequence for the Well Balanced Chunk is the one defined by
12282
* the content production in the XML grammar:
12284
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12286
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
12287
* the parser error code otherwise
12289
* In case recover is set to 1, the nodelist will not be empty even if
12290
* the parsed chunk is not well balanced, assuming the parsing succeeded to
12294
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12295
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12297
xmlParserCtxtPtr ctxt;
12299
xmlSAXHandlerPtr oldsax = NULL;
12300
xmlNodePtr content, newRoot;
12305
return(XML_ERR_ENTITY_LOOP);
12311
if (string == NULL)
12314
size = xmlStrlen(string);
12316
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12317
if (ctxt == NULL) return(-1);
12318
ctxt->userData = ctxt;
12320
oldsax = ctxt->sax;
12322
if (user_data != NULL)
12323
ctxt->userData = user_data;
12325
newDoc = xmlNewDoc(BAD_CAST "1.0");
12326
if (newDoc == NULL) {
12327
xmlFreeParserCtxt(ctxt);
12330
if ((doc != NULL) && (doc->dict != NULL)) {
12331
xmlDictFree(ctxt->dict);
12332
ctxt->dict = doc->dict;
12333
xmlDictReference(ctxt->dict);
12334
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12335
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12336
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12337
ctxt->dictNames = 1;
12339
xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12342
newDoc->intSubset = doc->intSubset;
12343
newDoc->extSubset = doc->extSubset;
12345
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12346
if (newRoot == NULL) {
12348
ctxt->sax = oldsax;
12349
xmlFreeParserCtxt(ctxt);
12350
newDoc->intSubset = NULL;
12351
newDoc->extSubset = NULL;
12352
xmlFreeDoc(newDoc);
12355
xmlAddChild((xmlNodePtr) newDoc, newRoot);
12356
nodePush(ctxt, newRoot);
12358
ctxt->myDoc = newDoc;
12360
ctxt->myDoc = newDoc;
12361
newDoc->children->doc = doc;
12362
/* Ensure that doc has XML spec namespace */
12363
xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12364
newDoc->oldNs = doc->oldNs;
12366
ctxt->instate = XML_PARSER_CONTENT;
12367
ctxt->depth = depth;
12370
* Doing validity checking on chunk doesn't make sense
12372
ctxt->validate = 0;
12373
ctxt->loadsubset = 0;
12374
xmlDetectSAX2(ctxt);
12376
if ( doc != NULL ){
12377
content = doc->children;
12378
doc->children = NULL;
12379
xmlParseContent(ctxt);
12380
doc->children = content;
12383
xmlParseContent(ctxt);
12385
if ((RAW == '<') && (NXT(1) == '/')) {
12386
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12387
} else if (RAW != 0) {
12388
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12390
if (ctxt->node != newDoc->children) {
12391
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12394
if (!ctxt->wellFormed) {
12395
if (ctxt->errNo == 0)
12403
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12407
* Return the newly created nodeset after unlinking it from
12408
* they pseudo parent.
12410
cur = newDoc->children->children;
12412
while (cur != NULL) {
12413
xmlSetTreeDoc(cur, doc);
12414
cur->parent = NULL;
12417
newDoc->children->children = NULL;
12421
ctxt->sax = oldsax;
12422
xmlFreeParserCtxt(ctxt);
12423
newDoc->intSubset = NULL;
12424
newDoc->extSubset = NULL;
12425
newDoc->oldNs = NULL;
12426
xmlFreeDoc(newDoc);
12432
* xmlSAXParseEntity:
12433
* @sax: the SAX handler block
12434
* @filename: the filename
12436
* parse an XML external entity out of context and build a tree.
12437
* It use the given SAX function block to handle the parsing callback.
12438
* If sax is NULL, fallback to the default DOM tree building routines.
12440
* [78] extParsedEnt ::= TextDecl? content
12442
* This correspond to a "Well Balanced" chunk
12444
* Returns the resulting document tree
12448
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12450
xmlParserCtxtPtr ctxt;
12452
ctxt = xmlCreateFileParserCtxt(filename);
12453
if (ctxt == NULL) {
12457
if (ctxt->sax != NULL)
12458
xmlFree(ctxt->sax);
12460
ctxt->userData = NULL;
12463
xmlParseExtParsedEnt(ctxt);
12465
if (ctxt->wellFormed)
12469
xmlFreeDoc(ctxt->myDoc);
12470
ctxt->myDoc = NULL;
12474
xmlFreeParserCtxt(ctxt);
12481
* @filename: the filename
12483
* parse an XML external entity out of context and build a tree.
12485
* [78] extParsedEnt ::= TextDecl? content
12487
* This correspond to a "Well Balanced" chunk
12489
* Returns the resulting document tree
12493
xmlParseEntity(const char *filename) {
12494
return(xmlSAXParseEntity(NULL, filename));
12496
#endif /* LIBXML_SAX1_ENABLED */
12499
* xmlCreateEntityParserCtxt:
12500
* @URL: the entity URL
12501
* @ID: the entity PUBLIC ID
12502
* @base: a possible base for the target URI
12504
* Create a parser context for an external entity
12505
* Automatic support for ZLIB/Compress compressed document is provided
12506
* by default if found at compile-time.
12508
* Returns the new parser context or NULL
12511
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12512
const xmlChar *base) {
12513
xmlParserCtxtPtr ctxt;
12514
xmlParserInputPtr inputStream;
12515
char *directory = NULL;
12518
ctxt = xmlNewParserCtxt();
12519
if (ctxt == NULL) {
12523
uri = xmlBuildURI(URL, base);
12526
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12527
if (inputStream == NULL) {
12528
xmlFreeParserCtxt(ctxt);
12532
inputPush(ctxt, inputStream);
12534
if ((ctxt->directory == NULL) && (directory == NULL))
12535
directory = xmlParserGetDirectory((char *)URL);
12536
if ((ctxt->directory == NULL) && (directory != NULL))
12537
ctxt->directory = directory;
12539
inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12540
if (inputStream == NULL) {
12542
xmlFreeParserCtxt(ctxt);
12546
inputPush(ctxt, inputStream);
12548
if ((ctxt->directory == NULL) && (directory == NULL))
12549
directory = xmlParserGetDirectory((char *)uri);
12550
if ((ctxt->directory == NULL) && (directory != NULL))
12551
ctxt->directory = directory;
12557
/************************************************************************
12559
* Front ends when parsing from a file *
12561
************************************************************************/
12564
* xmlCreateURLParserCtxt:
12565
* @filename: the filename or URL
12566
* @options: a combination of xmlParserOption
12568
* Create a parser context for a file or URL content.
12569
* Automatic support for ZLIB/Compress compressed document is provided
12570
* by default if found at compile-time and for file accesses
12572
* Returns the new parser context or NULL
12575
xmlCreateURLParserCtxt(const char *filename, int options)
12577
xmlParserCtxtPtr ctxt;
12578
xmlParserInputPtr inputStream;
12579
char *directory = NULL;
12581
ctxt = xmlNewParserCtxt();
12582
if (ctxt == NULL) {
12583
xmlErrMemory(NULL, "cannot allocate parser context");
12588
xmlCtxtUseOptions(ctxt, options);
12589
ctxt->linenumbers = 1;
12591
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12592
if (inputStream == NULL) {
12593
xmlFreeParserCtxt(ctxt);
12597
inputPush(ctxt, inputStream);
12598
if ((ctxt->directory == NULL) && (directory == NULL))
12599
directory = xmlParserGetDirectory(filename);
12600
if ((ctxt->directory == NULL) && (directory != NULL))
12601
ctxt->directory = directory;
12607
* xmlCreateFileParserCtxt:
12608
* @filename: the filename
12610
* Create a parser context for a file content.
12611
* Automatic support for ZLIB/Compress compressed document is provided
12612
* by default if found at compile-time.
12614
* Returns the new parser context or NULL
12617
xmlCreateFileParserCtxt(const char *filename)
12619
return(xmlCreateURLParserCtxt(filename, 0));
12622
#ifdef LIBXML_SAX1_ENABLED
12624
* xmlSAXParseFileWithData:
12625
* @sax: the SAX handler block
12626
* @filename: the filename
12627
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
12629
* @data: the userdata
12631
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
12632
* compressed document is provided by default if found at compile-time.
12633
* It use the given SAX function block to handle the parsing callback.
12634
* If sax is NULL, fallback to the default DOM tree building routines.
12636
* User data (void *) is stored within the parser context in the
12637
* context's _private member, so it is available nearly everywhere in libxml
12639
* Returns the resulting document tree
12643
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12644
int recovery, void *data) {
12646
xmlParserCtxtPtr ctxt;
12650
ctxt = xmlCreateFileParserCtxt(filename);
12651
if (ctxt == NULL) {
12655
if (ctxt->sax != NULL)
12656
xmlFree(ctxt->sax);
12659
xmlDetectSAX2(ctxt);
12661
ctxt->_private = data;
12664
if (ctxt->directory == NULL)
12665
ctxt->directory = xmlParserGetDirectory(filename);
12667
ctxt->recovery = recovery;
12669
xmlParseDocument(ctxt);
12671
if ((ctxt->wellFormed) || recovery) {
12674
if (ctxt->input->buf->compressed > 0)
12675
ret->compression = 9;
12677
ret->compression = ctxt->input->buf->compressed;
12682
xmlFreeDoc(ctxt->myDoc);
12683
ctxt->myDoc = NULL;
12687
xmlFreeParserCtxt(ctxt);
12694
* @sax: the SAX handler block
12695
* @filename: the filename
12696
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
12699
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
12700
* compressed document is provided by default if found at compile-time.
12701
* It use the given SAX function block to handle the parsing callback.
12702
* If sax is NULL, fallback to the default DOM tree building routines.
12704
* Returns the resulting document tree
12708
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12710
return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12715
* @cur: a pointer to an array of xmlChar
12717
* parse an XML in-memory document and build a tree.
12718
* In the case the document is not Well Formed, a attempt to build a
12719
* tree is tried anyway
12721
* Returns the resulting document tree or NULL in case of failure
12725
xmlRecoverDoc(xmlChar *cur) {
12726
return(xmlSAXParseDoc(NULL, cur, 1));
12731
* @filename: the filename
12733
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
12734
* compressed document is provided by default if found at compile-time.
12736
* Returns the resulting document tree if the file was wellformed,
12741
xmlParseFile(const char *filename) {
12742
return(xmlSAXParseFile(NULL, filename, 0));
12747
* @filename: the filename
12749
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
12750
* compressed document is provided by default if found at compile-time.
12751
* In the case the document is not Well Formed, it attempts to build
12754
* Returns the resulting document tree or NULL in case of failure
12758
xmlRecoverFile(const char *filename) {
12759
return(xmlSAXParseFile(NULL, filename, 1));
12764
* xmlSetupParserForBuffer:
12765
* @ctxt: an XML parser context
12766
* @buffer: a xmlChar * buffer
12767
* @filename: a file name
12769
* Setup the parser context to parse a new buffer; Clears any prior
12770
* contents from the parser context. The buffer parameter must not be
12771
* NULL, but the filename parameter can be
12774
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12775
const char* filename)
12777
xmlParserInputPtr input;
12779
if ((ctxt == NULL) || (buffer == NULL))
12782
input = xmlNewInputStream(ctxt);
12783
if (input == NULL) {
12784
xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12785
xmlClearParserCtxt(ctxt);
12789
xmlClearParserCtxt(ctxt);
12790
if (filename != NULL)
12791
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12792
input->base = buffer;
12793
input->cur = buffer;
12794
input->end = &buffer[xmlStrlen(buffer)];
12795
inputPush(ctxt, input);
12799
* xmlSAXUserParseFile:
12800
* @sax: a SAX handler
12801
* @user_data: The user data returned on SAX callbacks
12802
* @filename: a file name
12804
* parse an XML file and call the given SAX handler routines.
12805
* Automatic support for ZLIB/Compress compressed document is provided
12807
* Returns 0 in case of success or a error number otherwise
12810
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12811
const char *filename) {
12813
xmlParserCtxtPtr ctxt;
12815
ctxt = xmlCreateFileParserCtxt(filename);
12816
if (ctxt == NULL) return -1;
12817
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12818
xmlFree(ctxt->sax);
12820
xmlDetectSAX2(ctxt);
12822
if (user_data != NULL)
12823
ctxt->userData = user_data;
12825
xmlParseDocument(ctxt);
12827
if (ctxt->wellFormed)
12830
if (ctxt->errNo != 0)
12837
if (ctxt->myDoc != NULL) {
12838
xmlFreeDoc(ctxt->myDoc);
12839
ctxt->myDoc = NULL;
12841
xmlFreeParserCtxt(ctxt);
12845
#endif /* LIBXML_SAX1_ENABLED */
12847
/************************************************************************
12849
* Front ends when parsing from memory *
12851
************************************************************************/
12854
* xmlCreateMemoryParserCtxt:
12855
* @buffer: a pointer to a char array
12856
* @size: the size of the array
12858
* Create a parser context for an XML in-memory document.
12860
* Returns the new parser context or NULL
12863
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12864
xmlParserCtxtPtr ctxt;
12865
xmlParserInputPtr input;
12866
xmlParserInputBufferPtr buf;
12868
if (buffer == NULL)
12873
ctxt = xmlNewParserCtxt();
12877
/* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12878
buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12880
xmlFreeParserCtxt(ctxt);
12884
input = xmlNewInputStream(ctxt);
12885
if (input == NULL) {
12886
xmlFreeParserInputBuffer(buf);
12887
xmlFreeParserCtxt(ctxt);
12891
input->filename = NULL;
12893
input->base = input->buf->buffer->content;
12894
input->cur = input->buf->buffer->content;
12895
input->end = &input->buf->buffer->content[input->buf->buffer->use];
12897
inputPush(ctxt, input);
12901
#ifdef LIBXML_SAX1_ENABLED
12903
* xmlSAXParseMemoryWithData:
12904
* @sax: the SAX handler block
12905
* @buffer: an pointer to a char array
12906
* @size: the size of the array
12907
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
12909
* @data: the userdata
12911
* parse an XML in-memory block and use the given SAX function block
12912
* to handle the parsing callback. If sax is NULL, fallback to the default
12913
* DOM tree building routines.
12915
* User data (void *) is stored within the parser context in the
12916
* context's _private member, so it is available nearly everywhere in libxml
12918
* Returns the resulting document tree
12922
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12923
int size, int recovery, void *data) {
12925
xmlParserCtxtPtr ctxt;
12927
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12928
if (ctxt == NULL) return(NULL);
12930
if (ctxt->sax != NULL)
12931
xmlFree(ctxt->sax);
12934
xmlDetectSAX2(ctxt);
12936
ctxt->_private=data;
12939
ctxt->recovery = recovery;
12941
xmlParseDocument(ctxt);
12943
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12946
xmlFreeDoc(ctxt->myDoc);
12947
ctxt->myDoc = NULL;
12951
xmlFreeParserCtxt(ctxt);
12957
* xmlSAXParseMemory:
12958
* @sax: the SAX handler block
12959
* @buffer: an pointer to a char array
12960
* @size: the size of the array
12961
* @recovery: work in recovery mode, i.e. tries to read not Well Formed
12964
* parse an XML in-memory block and use the given SAX function block
12965
* to handle the parsing callback. If sax is NULL, fallback to the default
12966
* DOM tree building routines.
12968
* Returns the resulting document tree
12971
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12972
int size, int recovery) {
12973
return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12978
* @buffer: an pointer to a char array
12979
* @size: the size of the array
12981
* parse an XML in-memory block and build a tree.
12983
* Returns the resulting document tree
12986
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12987
return(xmlSAXParseMemory(NULL, buffer, size, 0));
12991
* xmlRecoverMemory:
12992
* @buffer: an pointer to a char array
12993
* @size: the size of the array
12995
* parse an XML in-memory block and build a tree.
12996
* In the case the document is not Well Formed, an attempt to
12997
* build a tree is tried anyway
12999
* Returns the resulting document tree or NULL in case of error
13002
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13003
return(xmlSAXParseMemory(NULL, buffer, size, 1));
13007
* xmlSAXUserParseMemory:
13008
* @sax: a SAX handler
13009
* @user_data: The user data returned on SAX callbacks
13010
* @buffer: an in-memory XML document input
13011
* @size: the length of the XML document in bytes
13013
* A better SAX parsing routine.
13014
* parse an XML in-memory buffer and call the given SAX handler routines.
13016
* Returns 0 in case of success or a error number otherwise
13018
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13019
const char *buffer, int size) {
13021
xmlParserCtxtPtr ctxt;
13023
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13024
if (ctxt == NULL) return -1;
13025
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13026
xmlFree(ctxt->sax);
13028
xmlDetectSAX2(ctxt);
13030
if (user_data != NULL)
13031
ctxt->userData = user_data;
13033
xmlParseDocument(ctxt);
13035
if (ctxt->wellFormed)
13038
if (ctxt->errNo != 0)
13045
if (ctxt->myDoc != NULL) {
13046
xmlFreeDoc(ctxt->myDoc);
13047
ctxt->myDoc = NULL;
13049
xmlFreeParserCtxt(ctxt);
13053
#endif /* LIBXML_SAX1_ENABLED */
13056
* xmlCreateDocParserCtxt:
13057
* @cur: a pointer to an array of xmlChar
13059
* Creates a parser context for an XML in-memory document.
13061
* Returns the new parser context or NULL
13064
xmlCreateDocParserCtxt(const xmlChar *cur) {
13069
len = xmlStrlen(cur);
13070
return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13073
#ifdef LIBXML_SAX1_ENABLED
13076
* @sax: the SAX handler block
13077
* @cur: a pointer to an array of xmlChar
13078
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
13081
* parse an XML in-memory document and build a tree.
13082
* It use the given SAX function block to handle the parsing callback.
13083
* If sax is NULL, fallback to the default DOM tree building routines.
13085
* Returns the resulting document tree
13089
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13091
xmlParserCtxtPtr ctxt;
13092
xmlSAXHandlerPtr oldsax = NULL;
13094
if (cur == NULL) return(NULL);
13097
ctxt = xmlCreateDocParserCtxt(cur);
13098
if (ctxt == NULL) return(NULL);
13100
oldsax = ctxt->sax;
13102
ctxt->userData = NULL;
13104
xmlDetectSAX2(ctxt);
13106
xmlParseDocument(ctxt);
13107
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13110
xmlFreeDoc(ctxt->myDoc);
13111
ctxt->myDoc = NULL;
13114
ctxt->sax = oldsax;
13115
xmlFreeParserCtxt(ctxt);
13122
* @cur: a pointer to an array of xmlChar
13124
* parse an XML in-memory document and build a tree.
13126
* Returns the resulting document tree
13130
xmlParseDoc(const xmlChar *cur) {
13131
return(xmlSAXParseDoc(NULL, cur, 0));
13133
#endif /* LIBXML_SAX1_ENABLED */
13135
#ifdef LIBXML_LEGACY_ENABLED
13136
/************************************************************************
13138
* Specific function to keep track of entities references *
13139
* and used by the XSLT debugger *
13141
************************************************************************/
13143
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13146
* xmlAddEntityReference:
13147
* @ent : A valid entity
13148
* @firstNode : A valid first node for children of entity
13149
* @lastNode : A valid last node of children entity
13151
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13154
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13155
xmlNodePtr lastNode)
13157
if (xmlEntityRefFunc != NULL) {
13158
(*xmlEntityRefFunc) (ent, firstNode, lastNode);
13164
* xmlSetEntityReferenceFunc:
13165
* @func: A valid function
13167
* Set the function to call call back when a xml reference has been made
13170
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13172
xmlEntityRefFunc = func;
13174
#endif /* LIBXML_LEGACY_ENABLED */
13176
/************************************************************************
13180
************************************************************************/
13182
#ifdef LIBXML_XPATH_ENABLED
13183
#include <libxml/xpath.h>
13186
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
13187
static int xmlParserInitialized = 0;
13192
* Initialization function for the XML parser.
13193
* This is not reentrant. Call once before processing in case of
13194
* use in multithreaded programs.
13198
xmlInitParser(void) {
13199
if (xmlParserInitialized != 0)
13202
#ifdef LIBXML_THREAD_ENABLED
13203
__xmlGlobalInitMutexLock();
13204
if (xmlParserInitialized == 0) {
13206
if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13207
(xmlGenericError == NULL))
13208
initGenericErrorDefaultFunc(NULL);
13212
xmlInitCharEncodingHandlers();
13213
xmlDefaultSAXHandlerInit();
13214
xmlRegisterDefaultInputCallbacks();
13215
#ifdef LIBXML_OUTPUT_ENABLED
13216
xmlRegisterDefaultOutputCallbacks();
13217
#endif /* LIBXML_OUTPUT_ENABLED */
13218
#ifdef LIBXML_HTML_ENABLED
13219
htmlInitAutoClose();
13220
htmlDefaultSAXHandlerInit();
13222
#ifdef LIBXML_XPATH_ENABLED
13225
xmlParserInitialized = 1;
13226
#ifdef LIBXML_THREAD_ENABLED
13228
__xmlGlobalInitMutexUnlock();
13233
* xmlCleanupParser:
13235
* This function name is somewhat misleading. It does not clean up
13236
* parser state, it cleans up memory allocated by the library itself.
13237
* It is a cleanup function for the XML library. It tries to reclaim all
13238
* related global memory allocated for the library processing.
13239
* It doesn't deallocate any document related memory. One should
13240
* call xmlCleanupParser() only when the process has finished using
13241
* the library and all XML/HTML documents built with it.
13242
* See also xmlInitParser() which has the opposite function of preparing
13243
* the library for operations.
13247
xmlCleanupParser(void) {
13248
if (!xmlParserInitialized)
13251
xmlCleanupCharEncodingHandlers();
13252
#ifdef LIBXML_CATALOG_ENABLED
13253
xmlCatalogCleanup();
13256
xmlCleanupInputCallbacks();
13257
#ifdef LIBXML_OUTPUT_ENABLED
13258
xmlCleanupOutputCallbacks();
13260
#ifdef LIBXML_SCHEMAS_ENABLED
13261
xmlSchemaCleanupTypes();
13262
xmlRelaxNGCleanupTypes();
13264
xmlCleanupGlobals();
13265
xmlResetLastError();
13266
xmlCleanupThreads(); /* must be last if called not from the main thread */
13267
xmlCleanupMemory();
13268
xmlParserInitialized = 0;
13271
/************************************************************************
13273
* New set (2.6.0) of simpler and more flexible APIs *
13275
************************************************************************/
13281
* Free a string if it is not owned by the "dict" dictionnary in the
13284
#define DICT_FREE(str) \
13285
if ((str) && ((!dict) || \
13286
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13287
xmlFree((char *)(str));
13291
* @ctxt: an XML parser context
13293
* Reset a parser context
13296
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13298
xmlParserInputPtr input;
13306
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13307
xmlFreeInputStream(input);
13310
ctxt->input = NULL;
13313
if (ctxt->spaceTab != NULL) {
13314
ctxt->spaceTab[0] = -1;
13315
ctxt->space = &ctxt->spaceTab[0];
13317
ctxt->space = NULL;
13327
DICT_FREE(ctxt->version);
13328
ctxt->version = NULL;
13329
DICT_FREE(ctxt->encoding);
13330
ctxt->encoding = NULL;
13331
DICT_FREE(ctxt->directory);
13332
ctxt->directory = NULL;
13333
DICT_FREE(ctxt->extSubURI);
13334
ctxt->extSubURI = NULL;
13335
DICT_FREE(ctxt->extSubSystem);
13336
ctxt->extSubSystem = NULL;
13337
if (ctxt->myDoc != NULL)
13338
xmlFreeDoc(ctxt->myDoc);
13339
ctxt->myDoc = NULL;
13341
ctxt->standalone = -1;
13342
ctxt->hasExternalSubset = 0;
13343
ctxt->hasPErefs = 0;
13345
ctxt->external = 0;
13346
ctxt->instate = XML_PARSER_START;
13349
ctxt->wellFormed = 1;
13350
ctxt->nsWellFormed = 1;
13351
ctxt->disableSAX = 0;
13354
ctxt->vctxt.userData = ctxt;
13355
ctxt->vctxt.error = xmlParserValidityError;
13356
ctxt->vctxt.warning = xmlParserValidityWarning;
13358
ctxt->record_info = 0;
13360
ctxt->checkIndex = 0;
13361
ctxt->inSubset = 0;
13362
ctxt->errNo = XML_ERR_OK;
13364
ctxt->charset = XML_CHAR_ENCODING_UTF8;
13365
ctxt->catalogs = NULL;
13366
xmlInitNodeInfoSeq(&ctxt->node_seq);
13368
if (ctxt->attsDefault != NULL) {
13369
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13370
ctxt->attsDefault = NULL;
13372
if (ctxt->attsSpecial != NULL) {
13373
xmlHashFree(ctxt->attsSpecial, NULL);
13374
ctxt->attsSpecial = NULL;
13377
#ifdef LIBXML_CATALOG_ENABLED
13378
if (ctxt->catalogs != NULL)
13379
xmlCatalogFreeLocal(ctxt->catalogs);
13381
if (ctxt->lastError.code != XML_ERR_OK)
13382
xmlResetError(&ctxt->lastError);
13386
* xmlCtxtResetPush:
13387
* @ctxt: an XML parser context
13388
* @chunk: a pointer to an array of chars
13389
* @size: number of chars in the array
13390
* @filename: an optional file name or URI
13391
* @encoding: the document encoding, or NULL
13393
* Reset a push parser context
13395
* Returns 0 in case of success and 1 in case of error
13398
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13399
int size, const char *filename, const char *encoding)
13401
xmlParserInputPtr inputStream;
13402
xmlParserInputBufferPtr buf;
13403
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13408
if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13409
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13411
buf = xmlAllocParserInputBuffer(enc);
13415
if (ctxt == NULL) {
13416
xmlFreeParserInputBuffer(buf);
13420
xmlCtxtReset(ctxt);
13422
if (ctxt->pushTab == NULL) {
13423
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13424
sizeof(xmlChar *));
13425
if (ctxt->pushTab == NULL) {
13426
xmlErrMemory(ctxt, NULL);
13427
xmlFreeParserInputBuffer(buf);
13432
if (filename == NULL) {
13433
ctxt->directory = NULL;
13435
ctxt->directory = xmlParserGetDirectory(filename);
13438
inputStream = xmlNewInputStream(ctxt);
13439
if (inputStream == NULL) {
13440
xmlFreeParserInputBuffer(buf);
13444
if (filename == NULL)
13445
inputStream->filename = NULL;
13447
inputStream->filename = (char *)
13448
xmlCanonicPath((const xmlChar *) filename);
13449
inputStream->buf = buf;
13450
inputStream->base = inputStream->buf->buffer->content;
13451
inputStream->cur = inputStream->buf->buffer->content;
13453
&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13455
inputPush(ctxt, inputStream);
13457
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13458
(ctxt->input->buf != NULL)) {
13459
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13460
int cur = ctxt->input->cur - ctxt->input->base;
13462
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13464
ctxt->input->base = ctxt->input->buf->buffer->content + base;
13465
ctxt->input->cur = ctxt->input->base + cur;
13467
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13470
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13474
if (encoding != NULL) {
13475
xmlCharEncodingHandlerPtr hdlr;
13477
hdlr = xmlFindCharEncodingHandler(encoding);
13478
if (hdlr != NULL) {
13479
xmlSwitchToEncoding(ctxt, hdlr);
13481
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13482
"Unsupported encoding %s\n", BAD_CAST encoding);
13484
} else if (enc != XML_CHAR_ENCODING_NONE) {
13485
xmlSwitchEncoding(ctxt, enc);
13492
* xmlCtxtUseOptions:
13493
* @ctxt: an XML parser context
13494
* @options: a combination of xmlParserOption
13496
* Applies the options to the parser context
13498
* Returns 0 in case of success, the set of unknown or unimplemented options
13499
* in case of error.
13502
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13506
if (options & XML_PARSE_RECOVER) {
13507
ctxt->recovery = 1;
13508
options -= XML_PARSE_RECOVER;
13510
ctxt->recovery = 0;
13511
if (options & XML_PARSE_DTDLOAD) {
13512
ctxt->loadsubset = XML_DETECT_IDS;
13513
options -= XML_PARSE_DTDLOAD;
13515
ctxt->loadsubset = 0;
13516
if (options & XML_PARSE_DTDATTR) {
13517
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13518
options -= XML_PARSE_DTDATTR;
13520
if (options & XML_PARSE_NOENT) {
13521
ctxt->replaceEntities = 1;
13522
/* ctxt->loadsubset |= XML_DETECT_IDS; */
13523
options -= XML_PARSE_NOENT;
13525
ctxt->replaceEntities = 0;
13526
if (options & XML_PARSE_PEDANTIC) {
13527
ctxt->pedantic = 1;
13528
options -= XML_PARSE_PEDANTIC;
13530
ctxt->pedantic = 0;
13531
if (options & XML_PARSE_NOBLANKS) {
13532
ctxt->keepBlanks = 0;
13533
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13534
options -= XML_PARSE_NOBLANKS;
13536
ctxt->keepBlanks = 1;
13537
if (options & XML_PARSE_DTDVALID) {
13538
ctxt->validate = 1;
13539
if (options & XML_PARSE_NOWARNING)
13540
ctxt->vctxt.warning = NULL;
13541
if (options & XML_PARSE_NOERROR)
13542
ctxt->vctxt.error = NULL;
13543
options -= XML_PARSE_DTDVALID;
13545
ctxt->validate = 0;
13546
if (options & XML_PARSE_NOWARNING) {
13547
ctxt->sax->warning = NULL;
13548
options -= XML_PARSE_NOWARNING;
13550
if (options & XML_PARSE_NOERROR) {
13551
ctxt->sax->error = NULL;
13552
ctxt->sax->fatalError = NULL;
13553
options -= XML_PARSE_NOERROR;
13555
#ifdef LIBXML_SAX1_ENABLED
13556
if (options & XML_PARSE_SAX1) {
13557
ctxt->sax->startElement = xmlSAX2StartElement;
13558
ctxt->sax->endElement = xmlSAX2EndElement;
13559
ctxt->sax->startElementNs = NULL;
13560
ctxt->sax->endElementNs = NULL;
13561
ctxt->sax->initialized = 1;
13562
options -= XML_PARSE_SAX1;
13564
#endif /* LIBXML_SAX1_ENABLED */
13565
if (options & XML_PARSE_NODICT) {
13566
ctxt->dictNames = 0;
13567
options -= XML_PARSE_NODICT;
13569
ctxt->dictNames = 1;
13571
if (options & XML_PARSE_NOCDATA) {
13572
ctxt->sax->cdataBlock = NULL;
13573
options -= XML_PARSE_NOCDATA;
13575
if (options & XML_PARSE_NSCLEAN) {
13576
ctxt->options |= XML_PARSE_NSCLEAN;
13577
options -= XML_PARSE_NSCLEAN;
13579
if (options & XML_PARSE_NONET) {
13580
ctxt->options |= XML_PARSE_NONET;
13581
options -= XML_PARSE_NONET;
13583
if (options & XML_PARSE_COMPACT) {
13584
ctxt->options |= XML_PARSE_COMPACT;
13585
options -= XML_PARSE_COMPACT;
13587
ctxt->linenumbers = 1;
13593
* @ctxt: an XML parser context
13594
* @URL: the base URL to use for the document
13595
* @encoding: the document encoding, or NULL
13596
* @options: a combination of xmlParserOption
13597
* @reuse: keep the context for reuse
13599
* Common front-end for the xmlRead functions
13601
* Returns the resulting document tree or NULL
13604
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13605
int options, int reuse)
13609
xmlCtxtUseOptions(ctxt, options);
13610
if (encoding != NULL) {
13611
xmlCharEncodingHandlerPtr hdlr;
13613
hdlr = xmlFindCharEncodingHandler(encoding);
13615
xmlSwitchToEncoding(ctxt, hdlr);
13617
if ((URL != NULL) && (ctxt->input != NULL) &&
13618
(ctxt->input->filename == NULL))
13619
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13620
xmlParseDocument(ctxt);
13621
if ((ctxt->wellFormed) || ctxt->recovery)
13625
if (ctxt->myDoc != NULL) {
13626
xmlFreeDoc(ctxt->myDoc);
13629
ctxt->myDoc = NULL;
13631
xmlFreeParserCtxt(ctxt);
13639
* @cur: a pointer to a zero terminated string
13640
* @URL: the base URL to use for the document
13641
* @encoding: the document encoding, or NULL
13642
* @options: a combination of xmlParserOption
13644
* parse an XML in-memory document and build a tree.
13646
* Returns the resulting document tree
13649
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13651
xmlParserCtxtPtr ctxt;
13656
ctxt = xmlCreateDocParserCtxt(cur);
13659
return (xmlDoRead(ctxt, URL, encoding, options, 0));
13664
* @filename: a file or URL
13665
* @encoding: the document encoding, or NULL
13666
* @options: a combination of xmlParserOption
13668
* parse an XML file from the filesystem or the network.
13670
* Returns the resulting document tree
13673
xmlReadFile(const char *filename, const char *encoding, int options)
13675
xmlParserCtxtPtr ctxt;
13677
ctxt = xmlCreateURLParserCtxt(filename, options);
13680
return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13685
* @buffer: a pointer to a char array
13686
* @size: the size of the array
13687
* @URL: the base URL to use for the document
13688
* @encoding: the document encoding, or NULL
13689
* @options: a combination of xmlParserOption
13691
* parse an XML in-memory document and build a tree.
13693
* Returns the resulting document tree
13696
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13698
xmlParserCtxtPtr ctxt;
13700
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13703
return (xmlDoRead(ctxt, URL, encoding, options, 0));
13708
* @fd: an open file descriptor
13709
* @URL: the base URL to use for the document
13710
* @encoding: the document encoding, or NULL
13711
* @options: a combination of xmlParserOption
13713
* parse an XML from a file descriptor and build a tree.
13714
* NOTE that the file descriptor will not be closed when the
13715
* reader is closed or reset.
13717
* Returns the resulting document tree
13720
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13722
xmlParserCtxtPtr ctxt;
13723
xmlParserInputBufferPtr input;
13724
xmlParserInputPtr stream;
13729
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13732
input->closecallback = NULL;
13733
ctxt = xmlNewParserCtxt();
13734
if (ctxt == NULL) {
13735
xmlFreeParserInputBuffer(input);
13738
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13739
if (stream == NULL) {
13740
xmlFreeParserInputBuffer(input);
13741
xmlFreeParserCtxt(ctxt);
13744
inputPush(ctxt, stream);
13745
return (xmlDoRead(ctxt, URL, encoding, options, 0));
13750
* @ioread: an I/O read function
13751
* @ioclose: an I/O close function
13752
* @ioctx: an I/O handler
13753
* @URL: the base URL to use for the document
13754
* @encoding: the document encoding, or NULL
13755
* @options: a combination of xmlParserOption
13757
* parse an XML document from I/O functions and source and build a tree.
13759
* Returns the resulting document tree
13762
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13763
void *ioctx, const char *URL, const char *encoding, int options)
13765
xmlParserCtxtPtr ctxt;
13766
xmlParserInputBufferPtr input;
13767
xmlParserInputPtr stream;
13769
if (ioread == NULL)
13772
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13773
XML_CHAR_ENCODING_NONE);
13776
ctxt = xmlNewParserCtxt();
13777
if (ctxt == NULL) {
13778
xmlFreeParserInputBuffer(input);
13781
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13782
if (stream == NULL) {
13783
xmlFreeParserInputBuffer(input);
13784
xmlFreeParserCtxt(ctxt);
13787
inputPush(ctxt, stream);
13788
return (xmlDoRead(ctxt, URL, encoding, options, 0));
13793
* @ctxt: an XML parser context
13794
* @cur: a pointer to a zero terminated string
13795
* @URL: the base URL to use for the document
13796
* @encoding: the document encoding, or NULL
13797
* @options: a combination of xmlParserOption
13799
* parse an XML in-memory document and build a tree.
13800
* This reuses the existing @ctxt parser context
13802
* Returns the resulting document tree
13805
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13806
const char *URL, const char *encoding, int options)
13808
xmlParserInputPtr stream;
13815
xmlCtxtReset(ctxt);
13817
stream = xmlNewStringInputStream(ctxt, cur);
13818
if (stream == NULL) {
13821
inputPush(ctxt, stream);
13822
return (xmlDoRead(ctxt, URL, encoding, options, 1));
13827
* @ctxt: an XML parser context
13828
* @filename: a file or URL
13829
* @encoding: the document encoding, or NULL
13830
* @options: a combination of xmlParserOption
13832
* parse an XML file from the filesystem or the network.
13833
* This reuses the existing @ctxt parser context
13835
* Returns the resulting document tree
13838
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13839
const char *encoding, int options)
13841
xmlParserInputPtr stream;
13843
if (filename == NULL)
13848
xmlCtxtReset(ctxt);
13850
stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13851
if (stream == NULL) {
13854
inputPush(ctxt, stream);
13855
return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13859
* xmlCtxtReadMemory:
13860
* @ctxt: an XML parser context
13861
* @buffer: a pointer to a char array
13862
* @size: the size of the array
13863
* @URL: the base URL to use for the document
13864
* @encoding: the document encoding, or NULL
13865
* @options: a combination of xmlParserOption
13867
* parse an XML in-memory document and build a tree.
13868
* This reuses the existing @ctxt parser context
13870
* Returns the resulting document tree
13873
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13874
const char *URL, const char *encoding, int options)
13876
xmlParserInputBufferPtr input;
13877
xmlParserInputPtr stream;
13881
if (buffer == NULL)
13884
xmlCtxtReset(ctxt);
13886
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13887
if (input == NULL) {
13891
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13892
if (stream == NULL) {
13893
xmlFreeParserInputBuffer(input);
13897
inputPush(ctxt, stream);
13898
return (xmlDoRead(ctxt, URL, encoding, options, 1));
13903
* @ctxt: an XML parser context
13904
* @fd: an open file descriptor
13905
* @URL: the base URL to use for the document
13906
* @encoding: the document encoding, or NULL
13907
* @options: a combination of xmlParserOption
13909
* parse an XML from a file descriptor and build a tree.
13910
* This reuses the existing @ctxt parser context
13911
* NOTE that the file descriptor will not be closed when the
13912
* reader is closed or reset.
13914
* Returns the resulting document tree
13917
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13918
const char *URL, const char *encoding, int options)
13920
xmlParserInputBufferPtr input;
13921
xmlParserInputPtr stream;
13928
xmlCtxtReset(ctxt);
13931
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13934
input->closecallback = NULL;
13935
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13936
if (stream == NULL) {
13937
xmlFreeParserInputBuffer(input);
13940
inputPush(ctxt, stream);
13941
return (xmlDoRead(ctxt, URL, encoding, options, 1));
13946
* @ctxt: an XML parser context
13947
* @ioread: an I/O read function
13948
* @ioclose: an I/O close function
13949
* @ioctx: an I/O handler
13950
* @URL: the base URL to use for the document
13951
* @encoding: the document encoding, or NULL
13952
* @options: a combination of xmlParserOption
13954
* parse an XML document from I/O functions and source and build a tree.
13955
* This reuses the existing @ctxt parser context
13957
* Returns the resulting document tree
13960
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13961
xmlInputCloseCallback ioclose, void *ioctx,
13963
const char *encoding, int options)
13965
xmlParserInputBufferPtr input;
13966
xmlParserInputPtr stream;
13968
if (ioread == NULL)
13973
xmlCtxtReset(ctxt);
13975
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13976
XML_CHAR_ENCODING_NONE);
13979
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13980
if (stream == NULL) {
13981
xmlFreeParserInputBuffer(input);
13984
inputPush(ctxt, stream);
13985
return (xmlDoRead(ctxt, URL, encoding, options, 1));
13988
#define bottom_parser
13989
#include "elfgcchack.h"