2
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
* implemented on top of the SAX interfaces
6
* The XML specification:
7
* http://www.w3.org/TR/REC-xml
8
* Original 1.0 version:
9
* http://www.w3.org/TR/1998/REC-xml-19980210
10
* XML second edition working draft
11
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
13
* Okay this is a big file, the parser core is around 7000 lines, then it
14
* is followed by the progressive parser top routines, then the various
15
* high level APIs to call the parser and a few miscellaneous functions.
16
* A number of helper functions and deprecated ones have been moved to
17
* parserInternals.c to reduce this file size.
18
* As much as possible the functions are associated with their relative
19
* production in the XML specification. A few productions defining the
20
* different ranges of character are actually implanted either in
21
* parserInternals.h or parserInternals.c
22
* The DOM tree build is realized from the default SAX callbacks in
24
* The routines doing the validation checks are in valid.c and called either
25
* from the SAX callbacks or as standalone functions using a preparsed
28
* See Copyright for the status of this software.
36
#if defined(WIN32) && !defined (__CYGWIN__)
37
#define XML_DIR_SEP '\\'
39
#define XML_DIR_SEP '/'
46
#include <libxml/xmlmemory.h>
47
#include <libxml/threads.h>
48
#include <libxml/globals.h>
49
#include <libxml/tree.h>
50
#include <libxml/parser.h>
51
#include <libxml/parserInternals.h>
52
#include <libxml/valid.h>
53
#include <libxml/entities.h>
54
#include <libxml/xmlerror.h>
55
#include <libxml/encoding.h>
56
#include <libxml/xmlIO.h>
57
#include <libxml/uri.h>
58
#ifdef LIBXML_CATALOG_ENABLED
59
#include <libxml/catalog.h>
61
#ifdef LIBXML_SCHEMAS_ENABLED
62
#include <libxml/xmlschemastypes.h>
63
#include <libxml/relaxng.h>
71
#ifdef HAVE_SYS_STAT_H
88
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
90
static xmlParserCtxtPtr
91
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
92
const xmlChar *base, xmlParserCtxtPtr pctx);
94
/************************************************************************
96
* Arbitrary limits set in the parser. See XML_PARSE_HUGE *
98
************************************************************************/
100
#define XML_PARSER_BIG_ENTITY 1000
101
#define XML_PARSER_LOT_ENTITY 5000
104
* XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
105
* replacement over the size in byte of the input indicates that you have
106
* and eponential behaviour. A value of 10 correspond to at least 3 entity
107
* replacement per byte of input.
109
#define XML_PARSER_NON_LINEAR 10
112
* xmlParserEntityCheck
114
* Function to check non-linear entity expansion behaviour
115
* This is here to detect and stop exponential linear entity expansion
116
* This is not a limitation of the parser but a safety
117
* boundary feature. It can be disabled with the XML_PARSE_HUGE
121
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
126
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
128
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132
* Do the check based on the replacement size of the entity
134
if (size < XML_PARSER_BIG_ENTITY)
138
* A limit on the amount of text data reasonably used
140
if (ctxt->input != NULL) {
141
consumed = ctxt->input->consumed +
142
(ctxt->input->cur - ctxt->input->base);
144
consumed += ctxt->sizeentities;
146
if ((size < XML_PARSER_NON_LINEAR * consumed) &&
147
(ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
149
} else if (ent != NULL) {
151
* use the number of parsed entities in the replacement
156
* The amount of data parsed counting entities size only once
158
if (ctxt->input != NULL) {
159
consumed = ctxt->input->consumed +
160
(ctxt->input->cur - ctxt->input->base);
162
consumed += ctxt->sizeentities;
165
* Check the density of entities for the amount of data
166
* knowing an entity reference will take at least 3 bytes
168
if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
172
* strange we got no data for checking just return
177
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
184
* arbitrary depth limit for the XML documents that we allow to
185
* process. This is not a limitation of the parser but a safety
186
* boundary feature. It can be disabled with the XML_PARSE_HUGE
189
unsigned int xmlParserMaxDepth = 256;
194
#define XML_PARSER_BIG_BUFFER_SIZE 300
195
#define XML_PARSER_BUFFER_SIZE 100
196
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
199
* List of XML prefixed PI allowed by W3C specs
202
static const char *xmlW3CPIs[] = {
209
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
210
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
211
const xmlChar **str);
213
static xmlParserErrors
214
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
215
xmlSAXHandlerPtr sax,
216
void *user_data, int depth, const xmlChar *URL,
217
const xmlChar *ID, xmlNodePtr *list);
220
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
221
const char *encoding);
222
#ifdef LIBXML_LEGACY_ENABLED
224
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
225
xmlNodePtr lastNode);
226
#endif /* LIBXML_LEGACY_ENABLED */
228
static xmlParserErrors
229
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
230
const xmlChar *string, void *user_data, xmlNodePtr *lst);
233
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
235
/************************************************************************
237
* Some factorized error routines *
239
************************************************************************/
242
* xmlErrAttributeDup:
243
* @ctxt: an XML parser context
244
* @prefix: the attribute prefix
245
* @localname: the attribute localname
247
* Handle a redefinition of attribute error
250
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
251
const xmlChar * localname)
253
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
254
(ctxt->instate == XML_PARSER_EOF))
257
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
260
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
261
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
262
(const char *) localname, NULL, NULL, 0, 0,
263
"Attribute %s redefined\n", localname);
265
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
266
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
267
(const char *) prefix, (const char *) localname,
268
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
271
ctxt->wellFormed = 0;
272
if (ctxt->recovery == 0)
273
ctxt->disableSAX = 1;
279
* @ctxt: an XML parser context
280
* @error: the error number
281
* @extra: extra information string
283
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
286
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
290
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
291
(ctxt->instate == XML_PARSER_EOF))
294
case XML_ERR_INVALID_HEX_CHARREF:
295
errmsg = "CharRef: invalid hexadecimal value\n";
297
case XML_ERR_INVALID_DEC_CHARREF:
298
errmsg = "CharRef: invalid decimal value\n";
300
case XML_ERR_INVALID_CHARREF:
301
errmsg = "CharRef: invalid value\n";
303
case XML_ERR_INTERNAL_ERROR:
304
errmsg = "internal error";
306
case XML_ERR_PEREF_AT_EOF:
307
errmsg = "PEReference at end of document\n";
309
case XML_ERR_PEREF_IN_PROLOG:
310
errmsg = "PEReference in prolog\n";
312
case XML_ERR_PEREF_IN_EPILOG:
313
errmsg = "PEReference in epilog\n";
315
case XML_ERR_PEREF_NO_NAME:
316
errmsg = "PEReference: no name\n";
318
case XML_ERR_PEREF_SEMICOL_MISSING:
319
errmsg = "PEReference: expecting ';'\n";
321
case XML_ERR_ENTITY_LOOP:
322
errmsg = "Detected an entity reference loop\n";
324
case XML_ERR_ENTITY_NOT_STARTED:
325
errmsg = "EntityValue: \" or ' expected\n";
327
case XML_ERR_ENTITY_PE_INTERNAL:
328
errmsg = "PEReferences forbidden in internal subset\n";
330
case XML_ERR_ENTITY_NOT_FINISHED:
331
errmsg = "EntityValue: \" or ' expected\n";
333
case XML_ERR_ATTRIBUTE_NOT_STARTED:
334
errmsg = "AttValue: \" or ' expected\n";
336
case XML_ERR_LT_IN_ATTRIBUTE:
337
errmsg = "Unescaped '<' not allowed in attributes values\n";
339
case XML_ERR_LITERAL_NOT_STARTED:
340
errmsg = "SystemLiteral \" or ' expected\n";
342
case XML_ERR_LITERAL_NOT_FINISHED:
343
errmsg = "Unfinished System or Public ID \" or ' expected\n";
345
case XML_ERR_MISPLACED_CDATA_END:
346
errmsg = "Sequence ']]>' not allowed in content\n";
348
case XML_ERR_URI_REQUIRED:
349
errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
351
case XML_ERR_PUBID_REQUIRED:
352
errmsg = "PUBLIC, the Public Identifier is missing\n";
354
case XML_ERR_HYPHEN_IN_COMMENT:
355
errmsg = "Comment must not contain '--' (double-hyphen)\n";
357
case XML_ERR_PI_NOT_STARTED:
358
errmsg = "xmlParsePI : no target name\n";
360
case XML_ERR_RESERVED_XML_NAME:
361
errmsg = "Invalid PI name\n";
363
case XML_ERR_NOTATION_NOT_STARTED:
364
errmsg = "NOTATION: Name expected here\n";
366
case XML_ERR_NOTATION_NOT_FINISHED:
367
errmsg = "'>' required to close NOTATION declaration\n";
369
case XML_ERR_VALUE_REQUIRED:
370
errmsg = "Entity value required\n";
372
case XML_ERR_URI_FRAGMENT:
373
errmsg = "Fragment not allowed";
375
case XML_ERR_ATTLIST_NOT_STARTED:
376
errmsg = "'(' required to start ATTLIST enumeration\n";
378
case XML_ERR_NMTOKEN_REQUIRED:
379
errmsg = "NmToken expected in ATTLIST enumeration\n";
381
case XML_ERR_ATTLIST_NOT_FINISHED:
382
errmsg = "')' required to finish ATTLIST enumeration\n";
384
case XML_ERR_MIXED_NOT_STARTED:
385
errmsg = "MixedContentDecl : '|' or ')*' expected\n";
387
case XML_ERR_PCDATA_REQUIRED:
388
errmsg = "MixedContentDecl : '#PCDATA' expected\n";
390
case XML_ERR_ELEMCONTENT_NOT_STARTED:
391
errmsg = "ContentDecl : Name or '(' expected\n";
393
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
394
errmsg = "ContentDecl : ',' '|' or ')' expected\n";
396
case XML_ERR_PEREF_IN_INT_SUBSET:
398
"PEReference: forbidden within markup decl in internal subset\n";
400
case XML_ERR_GT_REQUIRED:
401
errmsg = "expected '>'\n";
403
case XML_ERR_CONDSEC_INVALID:
404
errmsg = "XML conditional section '[' expected\n";
406
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
407
errmsg = "Content error in the external subset\n";
409
case XML_ERR_CONDSEC_INVALID_KEYWORD:
411
"conditional section INCLUDE or IGNORE keyword expected\n";
413
case XML_ERR_CONDSEC_NOT_FINISHED:
414
errmsg = "XML conditional section not closed\n";
416
case XML_ERR_XMLDECL_NOT_STARTED:
417
errmsg = "Text declaration '<?xml' required\n";
419
case XML_ERR_XMLDECL_NOT_FINISHED:
420
errmsg = "parsing XML declaration: '?>' expected\n";
422
case XML_ERR_EXT_ENTITY_STANDALONE:
423
errmsg = "external parsed entities cannot be standalone\n";
425
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
426
errmsg = "EntityRef: expecting ';'\n";
428
case XML_ERR_DOCTYPE_NOT_FINISHED:
429
errmsg = "DOCTYPE improperly terminated\n";
431
case XML_ERR_LTSLASH_REQUIRED:
432
errmsg = "EndTag: '</' not found\n";
434
case XML_ERR_EQUAL_REQUIRED:
435
errmsg = "expected '='\n";
437
case XML_ERR_STRING_NOT_CLOSED:
438
errmsg = "String not closed expecting \" or '\n";
440
case XML_ERR_STRING_NOT_STARTED:
441
errmsg = "String not started expecting ' or \"\n";
443
case XML_ERR_ENCODING_NAME:
444
errmsg = "Invalid XML encoding name\n";
446
case XML_ERR_STANDALONE_VALUE:
447
errmsg = "standalone accepts only 'yes' or 'no'\n";
449
case XML_ERR_DOCUMENT_EMPTY:
450
errmsg = "Document is empty\n";
452
case XML_ERR_DOCUMENT_END:
453
errmsg = "Extra content at the end of the document\n";
455
case XML_ERR_NOT_WELL_BALANCED:
456
errmsg = "chunk is not well balanced\n";
458
case XML_ERR_EXTRA_CONTENT:
459
errmsg = "extra content at the end of well balanced chunk\n";
461
case XML_ERR_VERSION_MISSING:
462
errmsg = "Malformed declaration expecting version\n";
470
errmsg = "Unregistered error message\n";
474
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
475
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
478
ctxt->wellFormed = 0;
479
if (ctxt->recovery == 0)
480
ctxt->disableSAX = 1;
486
* @ctxt: an XML parser context
487
* @error: the error number
488
* @msg: the error message
490
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
493
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
496
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
497
(ctxt->instate == XML_PARSER_EOF))
501
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
502
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
504
ctxt->wellFormed = 0;
505
if (ctxt->recovery == 0)
506
ctxt->disableSAX = 1;
512
* @ctxt: an XML parser context
513
* @error: the error number
514
* @msg: the error message
521
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
522
const char *msg, const xmlChar *str1, const xmlChar *str2)
524
xmlStructuredErrorFunc schannel = NULL;
526
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
527
(ctxt->instate == XML_PARSER_EOF))
529
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
530
(ctxt->sax->initialized == XML_SAX2_MAGIC))
531
schannel = ctxt->sax->serror;
533
__xmlRaiseError(schannel,
534
(ctxt->sax) ? ctxt->sax->warning : NULL,
536
ctxt, NULL, XML_FROM_PARSER, error,
537
XML_ERR_WARNING, NULL, 0,
538
(const char *) str1, (const char *) str2, NULL, 0, 0,
539
msg, (const char *) str1, (const char *) str2);
541
__xmlRaiseError(schannel, NULL, NULL,
542
ctxt, NULL, XML_FROM_PARSER, error,
543
XML_ERR_WARNING, NULL, 0,
544
(const char *) str1, (const char *) str2, NULL, 0, 0,
545
msg, (const char *) str1, (const char *) str2);
551
* @ctxt: an XML parser context
552
* @error: the error number
553
* @msg: the error message
556
* Handle a validity error.
559
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560
const char *msg, const xmlChar *str1, const xmlChar *str2)
562
xmlStructuredErrorFunc schannel = NULL;
564
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565
(ctxt->instate == XML_PARSER_EOF))
569
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
570
schannel = ctxt->sax->serror;
573
__xmlRaiseError(schannel,
574
ctxt->vctxt.error, ctxt->vctxt.userData,
575
ctxt, NULL, XML_FROM_DTD, error,
576
XML_ERR_ERROR, NULL, 0, (const char *) str1,
577
(const char *) str2, NULL, 0, 0,
578
msg, (const char *) str1, (const char *) str2);
581
__xmlRaiseError(schannel, NULL, NULL,
582
ctxt, NULL, XML_FROM_DTD, error,
583
XML_ERR_ERROR, NULL, 0, (const char *) str1,
584
(const char *) str2, NULL, 0, 0,
585
msg, (const char *) str1, (const char *) str2);
591
* @ctxt: an XML parser context
592
* @error: the error number
593
* @msg: the error message
594
* @val: an integer value
596
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
599
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
600
const char *msg, int val)
602
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603
(ctxt->instate == XML_PARSER_EOF))
607
__xmlRaiseError(NULL, NULL, NULL,
608
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
609
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
611
ctxt->wellFormed = 0;
612
if (ctxt->recovery == 0)
613
ctxt->disableSAX = 1;
618
* xmlFatalErrMsgStrIntStr:
619
* @ctxt: an XML parser context
620
* @error: the error number
621
* @msg: the error message
622
* @str1: an string info
623
* @val: an integer value
624
* @str2: an string info
626
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
629
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
630
const char *msg, const xmlChar *str1, int val,
633
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
(ctxt->instate == XML_PARSER_EOF))
638
__xmlRaiseError(NULL, NULL, NULL,
639
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
640
NULL, 0, (const char *) str1, (const char *) str2,
641
NULL, val, 0, msg, str1, val, str2);
643
ctxt->wellFormed = 0;
644
if (ctxt->recovery == 0)
645
ctxt->disableSAX = 1;
651
* @ctxt: an XML parser context
652
* @error: the error number
653
* @msg: the error message
654
* @val: a string value
656
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
659
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
const char *msg, const xmlChar * val)
662
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
(ctxt->instate == XML_PARSER_EOF))
667
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
XML_FROM_PARSER, error, XML_ERR_FATAL,
669
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
672
ctxt->wellFormed = 0;
673
if (ctxt->recovery == 0)
674
ctxt->disableSAX = 1;
680
* @ctxt: an XML parser context
681
* @error: the error number
682
* @msg: the error message
683
* @val: a string value
685
* Handle a non fatal parser error
688
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
689
const char *msg, const xmlChar * val)
691
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
692
(ctxt->instate == XML_PARSER_EOF))
696
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
697
XML_FROM_PARSER, error, XML_ERR_ERROR,
698
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
704
* @ctxt: an XML parser context
705
* @error: the error number
707
* @info1: extra information string
708
* @info2: extra information string
710
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
713
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
715
const xmlChar * info1, const xmlChar * info2,
716
const xmlChar * info3)
718
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
(ctxt->instate == XML_PARSER_EOF))
723
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
724
XML_ERR_ERROR, NULL, 0, (const char *) info1,
725
(const char *) info2, (const char *) info3, 0, 0, msg,
726
info1, info2, info3);
728
ctxt->nsWellFormed = 0;
733
* @ctxt: an XML parser context
734
* @error: the error number
736
* @info1: extra information string
737
* @info2: extra information string
739
* Handle a namespace warning error
742
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
744
const xmlChar * info1, const xmlChar * info2,
745
const xmlChar * info3)
747
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
748
(ctxt->instate == XML_PARSER_EOF))
750
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
751
XML_ERR_WARNING, NULL, 0, (const char *) info1,
752
(const char *) info2, (const char *) info3, 0, 0, msg,
753
info1, info2, info3);
756
/************************************************************************
758
* Library wide options *
760
************************************************************************/
764
* @feature: the feature to be examined
766
* Examines if the library has been compiled with a given feature.
768
* Returns a non-zero value if the feature exist, otherwise zero.
769
* Returns zero (0) if the feature does not exist or an unknown
770
* unknown feature is requested, non-zero otherwise.
773
xmlHasFeature(xmlFeature feature)
776
case XML_WITH_THREAD:
777
#ifdef LIBXML_THREAD_ENABLED
783
#ifdef LIBXML_TREE_ENABLED
788
case XML_WITH_OUTPUT:
789
#ifdef LIBXML_OUTPUT_ENABLED
795
#ifdef LIBXML_PUSH_ENABLED
800
case XML_WITH_READER:
801
#ifdef LIBXML_READER_ENABLED
806
case XML_WITH_PATTERN:
807
#ifdef LIBXML_PATTERN_ENABLED
812
case XML_WITH_WRITER:
813
#ifdef LIBXML_WRITER_ENABLED
819
#ifdef LIBXML_SAX1_ENABLED
825
#ifdef LIBXML_FTP_ENABLED
831
#ifdef LIBXML_HTTP_ENABLED
837
#ifdef LIBXML_VALID_ENABLED
843
#ifdef LIBXML_HTML_ENABLED
848
case XML_WITH_LEGACY:
849
#ifdef LIBXML_LEGACY_ENABLED
855
#ifdef LIBXML_C14N_ENABLED
860
case XML_WITH_CATALOG:
861
#ifdef LIBXML_CATALOG_ENABLED
867
#ifdef LIBXML_XPATH_ENABLED
873
#ifdef LIBXML_XPTR_ENABLED
878
case XML_WITH_XINCLUDE:
879
#ifdef LIBXML_XINCLUDE_ENABLED
885
#ifdef LIBXML_ICONV_ENABLED
890
case XML_WITH_ISO8859X:
891
#ifdef LIBXML_ISO8859X_ENABLED
896
case XML_WITH_UNICODE:
897
#ifdef LIBXML_UNICODE_ENABLED
902
case XML_WITH_REGEXP:
903
#ifdef LIBXML_REGEXP_ENABLED
908
case XML_WITH_AUTOMATA:
909
#ifdef LIBXML_AUTOMATA_ENABLED
915
#ifdef LIBXML_EXPR_ENABLED
920
case XML_WITH_SCHEMAS:
921
#ifdef LIBXML_SCHEMAS_ENABLED
926
case XML_WITH_SCHEMATRON:
927
#ifdef LIBXML_SCHEMATRON_ENABLED
932
case XML_WITH_MODULES:
933
#ifdef LIBXML_MODULES_ENABLED
939
#ifdef LIBXML_DEBUG_ENABLED
944
case XML_WITH_DEBUG_MEM:
945
#ifdef DEBUG_MEMORY_LOCATION
950
case XML_WITH_DEBUG_RUN:
951
#ifdef LIBXML_DEBUG_RUNTIME
957
#ifdef LIBXML_ZLIB_ENABLED
963
#ifdef LIBXML_LZMA_ENABLED
969
#ifdef LIBXML_ICU_ENABLED
980
/************************************************************************
982
* SAX2 defaulted attributes handling *
984
************************************************************************/
988
* @ctxt: an XML parser context
990
* Do the SAX2 detection and specific intialization
993
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
994
if (ctxt == NULL) return;
995
#ifdef LIBXML_SAX1_ENABLED
996
if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
997
((ctxt->sax->startElementNs != NULL) ||
998
(ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1001
#endif /* LIBXML_SAX1_ENABLED */
1003
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1004
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1005
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1006
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1007
(ctxt->str_xml_ns == NULL)) {
1008
xmlErrMemory(ctxt, NULL);
1012
typedef struct _xmlDefAttrs xmlDefAttrs;
1013
typedef xmlDefAttrs *xmlDefAttrsPtr;
1014
struct _xmlDefAttrs {
1015
int nbAttrs; /* number of defaulted attributes on that element */
1016
int maxAttrs; /* the size of the array */
1017
const xmlChar *values[5]; /* array of localname/prefix/values/external */
1021
* xmlAttrNormalizeSpace:
1022
* @src: the source string
1023
* @dst: the target string
1025
* Normalize the space in non CDATA attribute values:
1026
* If the attribute type is not CDATA, then the XML processor MUST further
1027
* process the normalized attribute value by discarding any leading and
1028
* trailing space (#x20) characters, and by replacing sequences of space
1029
* (#x20) characters by a single space (#x20) character.
1030
* Note that the size of dst need to be at least src, and if one doesn't need
1031
* to preserve dst (and it doesn't come from a dictionary or read-only) then
1032
* passing src as dst is just fine.
1034
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1038
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1040
if ((src == NULL) || (dst == NULL))
1043
while (*src == 0x20) src++;
1046
while (*src == 0x20) src++;
1060
* xmlAttrNormalizeSpace2:
1061
* @src: the source string
1063
* Normalize the space in non CDATA attribute values, a slightly more complex
1064
* front end to avoid allocation problems when running on attribute values
1065
* coming from the input.
1067
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1070
static const xmlChar *
1071
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1074
int remove_head = 0;
1075
int need_realloc = 0;
1078
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1085
while (*cur == 0x20) {
1092
if ((*cur == 0x20) || (*cur == 0)) {
1102
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1104
xmlErrMemory(ctxt, NULL);
1107
xmlAttrNormalizeSpace(ret, ret);
1108
*len = (int) strlen((const char *)ret);
1110
} else if (remove_head) {
1111
*len -= remove_head;
1112
memmove(src, src + remove_head, 1 + *len);
1120
* @ctxt: an XML parser context
1121
* @fullname: the element fullname
1122
* @fullattr: the attribute fullname
1123
* @value: the attribute value
1125
* Add a defaulted attribute for an element
1128
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1129
const xmlChar *fullname,
1130
const xmlChar *fullattr,
1131
const xmlChar *value) {
1132
xmlDefAttrsPtr defaults;
1134
const xmlChar *name;
1135
const xmlChar *prefix;
1138
* Allows to detect attribute redefinitions
1140
if (ctxt->attsSpecial != NULL) {
1141
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1145
if (ctxt->attsDefault == NULL) {
1146
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1147
if (ctxt->attsDefault == NULL)
1152
* split the element name into prefix:localname , the string found
1153
* are within the DTD and then not associated to namespace names.
1155
name = xmlSplitQName3(fullname, &len);
1157
name = xmlDictLookup(ctxt->dict, fullname, -1);
1160
name = xmlDictLookup(ctxt->dict, name, -1);
1161
prefix = xmlDictLookup(ctxt->dict, fullname, len);
1165
* make sure there is some storage
1167
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1168
if (defaults == NULL) {
1169
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1170
(4 * 5) * sizeof(const xmlChar *));
1171
if (defaults == NULL)
1173
defaults->nbAttrs = 0;
1174
defaults->maxAttrs = 4;
1175
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1176
defaults, NULL) < 0) {
1180
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
1181
xmlDefAttrsPtr temp;
1183
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1184
(2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1188
defaults->maxAttrs *= 2;
1189
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1190
defaults, NULL) < 0) {
1197
* Split the element name into prefix:localname , the string found
1198
* are within the DTD and hen not associated to namespace names.
1200
name = xmlSplitQName3(fullattr, &len);
1202
name = xmlDictLookup(ctxt->dict, fullattr, -1);
1205
name = xmlDictLookup(ctxt->dict, name, -1);
1206
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1209
defaults->values[5 * defaults->nbAttrs] = name;
1210
defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1211
/* intern the string and precompute the end */
1212
len = xmlStrlen(value);
1213
value = xmlDictLookup(ctxt->dict, value, len);
1214
defaults->values[5 * defaults->nbAttrs + 2] = value;
1215
defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1217
defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1219
defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1220
defaults->nbAttrs++;
1225
xmlErrMemory(ctxt, NULL);
1230
* xmlAddSpecialAttr:
1231
* @ctxt: an XML parser context
1232
* @fullname: the element fullname
1233
* @fullattr: the attribute fullname
1234
* @type: the attribute type
1236
* Register this attribute type
1239
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1240
const xmlChar *fullname,
1241
const xmlChar *fullattr,
1244
if (ctxt->attsSpecial == NULL) {
1245
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1246
if (ctxt->attsSpecial == NULL)
1250
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1253
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1254
(void *) (long) type);
1258
xmlErrMemory(ctxt, NULL);
1263
* xmlCleanSpecialAttrCallback:
1265
* Removes CDATA attributes from the special attribute table
1268
xmlCleanSpecialAttrCallback(void *payload, void *data,
1269
const xmlChar *fullname, const xmlChar *fullattr,
1270
const xmlChar *unused ATTRIBUTE_UNUSED) {
1271
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1273
if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1274
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1279
* xmlCleanSpecialAttr:
1280
* @ctxt: an XML parser context
1282
* Trim the list of attributes defined to remove all those of type
1283
* CDATA as they are not special. This call should be done when finishing
1284
* to parse the DTD and before starting to parse the document root.
1287
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1289
if (ctxt->attsSpecial == NULL)
1292
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1294
if (xmlHashSize(ctxt->attsSpecial) == 0) {
1295
xmlHashFree(ctxt->attsSpecial, NULL);
1296
ctxt->attsSpecial = NULL;
1302
* xmlCheckLanguageID:
1303
* @lang: pointer to the string value
1305
* Checks that the value conforms to the LanguageID production:
1307
* NOTE: this is somewhat deprecated, those productions were removed from
1308
* the XML Second edition.
1310
* [33] LanguageID ::= Langcode ('-' Subcode)*
1311
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
1312
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1313
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1314
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1315
* [38] Subcode ::= ([a-z] | [A-Z])+
1317
* The current REC reference the sucessors of RFC 1766, currently 5646
1319
* http://www.rfc-editor.org/rfc/rfc5646.txt
1320
* langtag = language
1326
* language = 2*3ALPHA ; shortest ISO 639 code
1327
* ["-" extlang] ; sometimes followed by
1328
* ; extended language subtags
1329
* / 4ALPHA ; or reserved for future use
1330
* / 5*8ALPHA ; or registered language subtag
1332
* extlang = 3ALPHA ; selected ISO 639 codes
1333
* *2("-" 3ALPHA) ; permanently reserved
1335
* script = 4ALPHA ; ISO 15924 code
1337
* region = 2ALPHA ; ISO 3166-1 code
1338
* / 3DIGIT ; UN M.49 code
1340
* variant = 5*8alphanum ; registered variants
1341
* / (DIGIT 3alphanum)
1343
* extension = singleton 1*("-" (2*8alphanum))
1345
* ; Single alphanumerics
1346
* ; "x" reserved for private use
1347
* singleton = DIGIT ; 0 - 9
1353
* it sounds right to still allow Irregular i-xxx IANA and user codes too
1354
* The parser below doesn't try to cope with extension or privateuse
1355
* that could be added but that's not interoperable anyway
1357
* Returns 1 if correct 0 otherwise
1360
xmlCheckLanguageID(const xmlChar * lang)
1362
const xmlChar *cur = lang, *nxt;
1366
if (((cur[0] == 'i') && (cur[1] == '-')) ||
1367
((cur[0] == 'I') && (cur[1] == '-')) ||
1368
((cur[0] == 'x') && (cur[1] == '-')) ||
1369
((cur[0] == 'X') && (cur[1] == '-'))) {
1371
* Still allow IANA code and user code which were coming
1372
* from the previous version of the XML-1.0 specification
1373
* it's deprecated but we should not fail
1376
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1377
((cur[0] >= 'a') && (cur[0] <= 'z')))
1379
return(cur[0] == 0);
1382
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1383
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1385
if (nxt - cur >= 4) {
1389
if ((nxt - cur > 8) || (nxt[0] != 0))
1395
/* we got an ISO 639 code */
1403
/* now we can have extlang or script or region or variant */
1404
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1407
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1408
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1414
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1418
/* we parsed an extlang */
1426
/* now we can have script or region or variant */
1427
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1430
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1431
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1435
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1439
/* we parsed a script */
1448
/* now we can have region or variant */
1449
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1452
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1453
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1456
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1460
/* we parsed a region */
1469
/* now we can just have a variant */
1470
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1471
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
if ((nxt - cur < 5) || (nxt - cur > 8))
1477
/* we parsed a variant */
1483
/* extensions and private use subtags not checked */
1487
if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1488
((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1495
/************************************************************************
1497
* Parser stacks related functions and macros *
1499
************************************************************************/
1501
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1502
const xmlChar ** str);
1507
* @ctxt: an XML parser context
1508
* @prefix: the namespace prefix or NULL
1509
* @URL: the namespace name
1511
* Pushes a new parser namespace on top of the ns stack
1513
* Returns -1 in case of error, -2 if the namespace should be discarded
1514
* and the index in the stack otherwise.
1517
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1519
if (ctxt->options & XML_PARSE_NSCLEAN) {
1521
for (i = 0;i < ctxt->nsNr;i += 2) {
1522
if (ctxt->nsTab[i] == prefix) {
1524
if (ctxt->nsTab[i + 1] == URL)
1526
/* out of scope keep it */
1531
if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1534
ctxt->nsTab = (const xmlChar **)
1535
xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1536
if (ctxt->nsTab == NULL) {
1537
xmlErrMemory(ctxt, NULL);
1541
} else if (ctxt->nsNr >= ctxt->nsMax) {
1542
const xmlChar ** tmp;
1544
tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1545
ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1547
xmlErrMemory(ctxt, NULL);
1553
ctxt->nsTab[ctxt->nsNr++] = prefix;
1554
ctxt->nsTab[ctxt->nsNr++] = URL;
1555
return (ctxt->nsNr);
1559
* @ctxt: an XML parser context
1560
* @nr: the number to pop
1562
* Pops the top @nr parser prefix/namespace from the ns stack
1564
* Returns the number of namespaces removed
1567
nsPop(xmlParserCtxtPtr ctxt, int nr)
1571
if (ctxt->nsTab == NULL) return(0);
1572
if (ctxt->nsNr < nr) {
1573
xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1576
if (ctxt->nsNr <= 0)
1579
for (i = 0;i < nr;i++) {
1581
ctxt->nsTab[ctxt->nsNr] = NULL;
1588
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1589
const xmlChar **atts;
1593
if (ctxt->atts == NULL) {
1594
maxatts = 55; /* allow for 10 attrs by default */
1595
atts = (const xmlChar **)
1596
xmlMalloc(maxatts * sizeof(xmlChar *));
1597
if (atts == NULL) goto mem_error;
1599
attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1600
if (attallocs == NULL) goto mem_error;
1601
ctxt->attallocs = attallocs;
1602
ctxt->maxatts = maxatts;
1603
} else if (nr + 5 > ctxt->maxatts) {
1604
maxatts = (nr + 5) * 2;
1605
atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1606
maxatts * sizeof(const xmlChar *));
1607
if (atts == NULL) goto mem_error;
1609
attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1610
(maxatts / 5) * sizeof(int));
1611
if (attallocs == NULL) goto mem_error;
1612
ctxt->attallocs = attallocs;
1613
ctxt->maxatts = maxatts;
1615
return(ctxt->maxatts);
1617
xmlErrMemory(ctxt, NULL);
1623
* @ctxt: an XML parser context
1624
* @value: the parser input
1626
* Pushes a new parser input on top of the input stack
1628
* Returns -1 in case of error, the index in the stack otherwise
1631
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1633
if ((ctxt == NULL) || (value == NULL))
1635
if (ctxt->inputNr >= ctxt->inputMax) {
1636
ctxt->inputMax *= 2;
1638
(xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1640
sizeof(ctxt->inputTab[0]));
1641
if (ctxt->inputTab == NULL) {
1642
xmlErrMemory(ctxt, NULL);
1643
xmlFreeInputStream(value);
1644
ctxt->inputMax /= 2;
1649
ctxt->inputTab[ctxt->inputNr] = value;
1650
ctxt->input = value;
1651
return (ctxt->inputNr++);
1655
* @ctxt: an XML parser context
1657
* Pops the top parser input from the input stack
1659
* Returns the input just removed
1662
inputPop(xmlParserCtxtPtr ctxt)
1664
xmlParserInputPtr ret;
1668
if (ctxt->inputNr <= 0)
1671
if (ctxt->inputNr > 0)
1672
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1675
ret = ctxt->inputTab[ctxt->inputNr];
1676
ctxt->inputTab[ctxt->inputNr] = NULL;
1681
* @ctxt: an XML parser context
1682
* @value: the element node
1684
* Pushes a new element node on top of the node stack
1686
* Returns -1 in case of error, the index in the stack otherwise
1689
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1691
if (ctxt == NULL) return(0);
1692
if (ctxt->nodeNr >= ctxt->nodeMax) {
1695
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1697
sizeof(ctxt->nodeTab[0]));
1699
xmlErrMemory(ctxt, NULL);
1702
ctxt->nodeTab = tmp;
1705
if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1706
((ctxt->options & XML_PARSE_HUGE) == 0)) {
1707
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1708
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1710
ctxt->instate = XML_PARSER_EOF;
1713
ctxt->nodeTab[ctxt->nodeNr] = value;
1715
return (ctxt->nodeNr++);
1720
* @ctxt: an XML parser context
1722
* Pops the top element node from the node stack
1724
* Returns the node just removed
1727
nodePop(xmlParserCtxtPtr ctxt)
1731
if (ctxt == NULL) return(NULL);
1732
if (ctxt->nodeNr <= 0)
1735
if (ctxt->nodeNr > 0)
1736
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1739
ret = ctxt->nodeTab[ctxt->nodeNr];
1740
ctxt->nodeTab[ctxt->nodeNr] = NULL;
1744
#ifdef LIBXML_PUSH_ENABLED
1747
* @ctxt: an XML parser context
1748
* @value: the element name
1749
* @prefix: the element prefix
1750
* @URI: the element namespace name
1752
* Pushes a new element name/prefix/URL on top of the name stack
1754
* Returns -1 in case of error, the index in the stack otherwise
1757
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1758
const xmlChar *prefix, const xmlChar *URI, int nsNr)
1760
if (ctxt->nameNr >= ctxt->nameMax) {
1761
const xmlChar * *tmp;
1764
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1766
sizeof(ctxt->nameTab[0]));
1771
ctxt->nameTab = tmp;
1772
tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1774
sizeof(ctxt->pushTab[0]));
1779
ctxt->pushTab = tmp2;
1781
ctxt->nameTab[ctxt->nameNr] = value;
1783
ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1784
ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1785
ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1786
return (ctxt->nameNr++);
1788
xmlErrMemory(ctxt, NULL);
1793
* @ctxt: an XML parser context
1795
* Pops the top element/prefix/URI name from the name stack
1797
* Returns the name just removed
1799
static const xmlChar *
1800
nameNsPop(xmlParserCtxtPtr ctxt)
1804
if (ctxt->nameNr <= 0)
1807
if (ctxt->nameNr > 0)
1808
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1811
ret = ctxt->nameTab[ctxt->nameNr];
1812
ctxt->nameTab[ctxt->nameNr] = NULL;
1815
#endif /* LIBXML_PUSH_ENABLED */
1819
* @ctxt: an XML parser context
1820
* @value: the element name
1822
* Pushes a new element name on top of the name stack
1824
* Returns -1 in case of error, the index in the stack otherwise
1827
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1829
if (ctxt == NULL) return (-1);
1831
if (ctxt->nameNr >= ctxt->nameMax) {
1832
const xmlChar * *tmp;
1833
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1835
sizeof(ctxt->nameTab[0]));
1839
ctxt->nameTab = tmp;
1842
ctxt->nameTab[ctxt->nameNr] = value;
1844
return (ctxt->nameNr++);
1846
xmlErrMemory(ctxt, NULL);
1851
* @ctxt: an XML parser context
1853
* Pops the top element name from the name stack
1855
* Returns the name just removed
1858
namePop(xmlParserCtxtPtr ctxt)
1862
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1865
if (ctxt->nameNr > 0)
1866
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1869
ret = ctxt->nameTab[ctxt->nameNr];
1870
ctxt->nameTab[ctxt->nameNr] = NULL;
1874
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1875
if (ctxt->spaceNr >= ctxt->spaceMax) {
1878
ctxt->spaceMax *= 2;
1879
tmp = (int *) xmlRealloc(ctxt->spaceTab,
1880
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1882
xmlErrMemory(ctxt, NULL);
1886
ctxt->spaceTab = tmp;
1888
ctxt->spaceTab[ctxt->spaceNr] = val;
1889
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1890
return(ctxt->spaceNr++);
1893
static int spacePop(xmlParserCtxtPtr ctxt) {
1895
if (ctxt->spaceNr <= 0) return(0);
1897
if (ctxt->spaceNr > 0)
1898
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1900
ctxt->space = &ctxt->spaceTab[0];
1901
ret = ctxt->spaceTab[ctxt->spaceNr];
1902
ctxt->spaceTab[ctxt->spaceNr] = -1;
1907
* Macros for accessing the content. Those should be used only by the parser,
1910
* Dirty macros, i.e. one often need to make assumption on the context to
1913
* CUR_PTR return the current pointer to the xmlChar to be parsed.
1914
* To be used with extreme caution since operations consuming
1915
* characters may move the input buffer to a different location !
1916
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1917
* This should be used internally by the parser
1918
* only to compare to ASCII values otherwise it would break when
1919
* running with UTF-8 encoding.
1920
* RAW same as CUR but in the input buffer, bypass any token
1921
* extraction that may have been done
1922
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1923
* to compare on ASCII based substring.
1924
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1925
* strings without newlines within the parser.
1926
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1927
* defined char within the parser.
1928
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1930
* NEXT Skip to the next character, this does the proper decoding
1931
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
1932
* NEXTL(l) Skip the current unicode character of l xmlChars long.
1933
* CUR_CHAR(l) returns the current unicode character (int), set l
1934
* to the number of xmlChars used for the encoding [0-5].
1935
* CUR_SCHAR same but operate on a string instead of the context
1936
* COPY_BUF copy the current unicode char to the target buffer, increment
1938
* GROW, SHRINK handling of input buffers
1941
#define RAW (*ctxt->input->cur)
1942
#define CUR (*ctxt->input->cur)
1943
#define NXT(val) ctxt->input->cur[(val)]
1944
#define CUR_PTR ctxt->input->cur
1946
#define CMP4( s, c1, c2, c3, c4 ) \
1947
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1948
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1949
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1950
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1951
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1952
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1953
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1954
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1955
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1956
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1957
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1958
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1959
((unsigned char *) s)[ 8 ] == c9 )
1960
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1961
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1962
((unsigned char *) s)[ 9 ] == c10 )
1964
#define SKIP(val) do { \
1965
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1966
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1967
if ((*ctxt->input->cur == 0) && \
1968
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1969
xmlPopInput(ctxt); \
1972
#define SKIPL(val) do { \
1974
for(skipl=0; skipl<val; skipl++) { \
1975
if (*(ctxt->input->cur) == '\n') { \
1976
ctxt->input->line++; ctxt->input->col = 1; \
1977
} else ctxt->input->col++; \
1979
ctxt->input->cur++; \
1981
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1982
if ((*ctxt->input->cur == 0) && \
1983
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1984
xmlPopInput(ctxt); \
1987
#define SHRINK if ((ctxt->progressive == 0) && \
1988
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1989
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1992
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1993
xmlParserInputShrink(ctxt->input);
1994
if ((*ctxt->input->cur == 0) &&
1995
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1999
#define GROW if ((ctxt->progressive == 0) && \
2000
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2003
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2004
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2005
if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2006
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2010
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2012
#define NEXT xmlNextChar(ctxt)
2015
ctxt->input->col++; \
2016
ctxt->input->cur++; \
2018
if (*ctxt->input->cur == 0) \
2019
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2022
#define NEXTL(l) do { \
2023
if (*(ctxt->input->cur) == '\n') { \
2024
ctxt->input->line++; ctxt->input->col = 1; \
2025
} else ctxt->input->col++; \
2026
ctxt->input->cur += l; \
2027
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2030
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2031
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2033
#define COPY_BUF(l,b,i,v) \
2034
if (l == 1) b[i++] = (xmlChar) v; \
2035
else i += xmlCopyCharMultiByte(&b[i],v)
2038
* xmlSkipBlankChars:
2039
* @ctxt: the XML parser context
2041
* skip all blanks character found at that point in the input streams.
2042
* It pops up finished entities in the process if allowable at that point.
2044
* Returns the number of space chars skipped
2048
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2052
* It's Okay to use CUR/NEXT here since all the blanks are on
2055
if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2058
* if we are in the document content, go really fast
2060
cur = ctxt->input->cur;
2061
while (IS_BLANK_CH(*cur)) {
2063
ctxt->input->line++; ctxt->input->col = 1;
2068
ctxt->input->cur = cur;
2069
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2070
cur = ctxt->input->cur;
2073
ctxt->input->cur = cur;
2078
while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2083
while ((cur == 0) && (ctxt->inputNr > 1) &&
2084
(ctxt->instate != XML_PARSER_COMMENT)) {
2089
* Need to handle support of entities branching here
2091
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2092
} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2097
/************************************************************************
2099
* Commodity functions to handle entities *
2101
************************************************************************/
2105
* @ctxt: an XML parser context
2107
* xmlPopInput: the current input pointed by ctxt->input came to an end
2108
* pop it and return the next char.
2110
* Returns the current xmlChar in the parser context
2113
xmlPopInput(xmlParserCtxtPtr ctxt) {
2114
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2115
if (xmlParserDebugEntities)
2116
xmlGenericError(xmlGenericErrorContext,
2117
"Popping input %d\n", ctxt->inputNr);
2118
xmlFreeInputStream(inputPop(ctxt));
2119
if ((*ctxt->input->cur == 0) &&
2120
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2121
return(xmlPopInput(ctxt));
2127
* @ctxt: an XML parser context
2128
* @input: an XML parser input fragment (entity, XML fragment ...).
2130
* xmlPushInput: switch to a new input stream which is stacked on top
2131
* of the previous one(s).
2132
* Returns -1 in case of error or the index in the input stack
2135
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2137
if (input == NULL) return(-1);
2139
if (xmlParserDebugEntities) {
2140
if ((ctxt->input != NULL) && (ctxt->input->filename))
2141
xmlGenericError(xmlGenericErrorContext,
2142
"%s(%d): ", ctxt->input->filename,
2144
xmlGenericError(xmlGenericErrorContext,
2145
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2147
ret = inputPush(ctxt, input);
2154
* @ctxt: an XML parser context
2156
* parse Reference declarations
2158
* [66] CharRef ::= '&#' [0-9]+ ';' |
2159
* '&#x' [0-9a-fA-F]+ ';'
2161
* [ WFC: Legal Character ]
2162
* Characters referred to using character references must match the
2163
* production for Char.
2165
* Returns the value parsed (as an int), 0 in case of error
2168
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2169
unsigned int val = 0;
2171
unsigned int outofrange = 0;
2174
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2176
if ((RAW == '&') && (NXT(1) == '#') &&
2180
while (RAW != ';') { /* loop blocked by count */
2185
if ((RAW >= '0') && (RAW <= '9'))
2186
val = val * 16 + (CUR - '0');
2187
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2188
val = val * 16 + (CUR - 'a') + 10;
2189
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2190
val = val * 16 + (CUR - 'A') + 10;
2192
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2203
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2208
} else if ((RAW == '&') && (NXT(1) == '#')) {
2211
while (RAW != ';') { /* loop blocked by count */
2216
if ((RAW >= '0') && (RAW <= '9'))
2217
val = val * 10 + (CUR - '0');
2219
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2230
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2236
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2240
* [ WFC: Legal Character ]
2241
* Characters referred to using character references must match the
2242
* production for Char.
2244
if ((IS_CHAR(val) && (outofrange == 0))) {
2247
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2248
"xmlParseCharRef: invalid xmlChar value %d\n",
2255
* xmlParseStringCharRef:
2256
* @ctxt: an XML parser context
2257
* @str: a pointer to an index in the string
2259
* parse Reference declarations, variant parsing from a string rather
2260
* than an an input flow.
2262
* [66] CharRef ::= '&#' [0-9]+ ';' |
2263
* '&#x' [0-9a-fA-F]+ ';'
2265
* [ WFC: Legal Character ]
2266
* Characters referred to using character references must match the
2267
* production for Char.
2269
* Returns the value parsed (as an int), 0 in case of error, str will be
2270
* updated to the current value of the index
2273
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2276
unsigned int val = 0;
2277
unsigned int outofrange = 0;
2279
if ((str == NULL) || (*str == NULL)) return(0);
2282
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2285
while (cur != ';') { /* Non input consuming loop */
2286
if ((cur >= '0') && (cur <= '9'))
2287
val = val * 16 + (cur - '0');
2288
else if ((cur >= 'a') && (cur <= 'f'))
2289
val = val * 16 + (cur - 'a') + 10;
2290
else if ((cur >= 'A') && (cur <= 'F'))
2291
val = val * 16 + (cur - 'A') + 10;
2293
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2305
} else if ((cur == '&') && (ptr[1] == '#')){
2308
while (cur != ';') { /* Non input consuming loops */
2309
if ((cur >= '0') && (cur <= '9'))
2310
val = val * 10 + (cur - '0');
2312
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2325
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2331
* [ WFC: Legal Character ]
2332
* Characters referred to using character references must match the
2333
* production for Char.
2335
if ((IS_CHAR(val) && (outofrange == 0))) {
2338
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2339
"xmlParseStringCharRef: invalid xmlChar value %d\n",
2346
* xmlNewBlanksWrapperInputStream:
2347
* @ctxt: an XML parser context
2348
* @entity: an Entity pointer
2350
* Create a new input stream for wrapping
2351
* blanks around a PEReference
2353
* Returns the new input stream or NULL
2356
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2358
static xmlParserInputPtr
2359
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2360
xmlParserInputPtr input;
2363
if (entity == NULL) {
2364
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2365
"xmlNewBlanksWrapperInputStream entity\n");
2368
if (xmlParserDebugEntities)
2369
xmlGenericError(xmlGenericErrorContext,
2370
"new blanks wrapper for entity: %s\n", entity->name);
2371
input = xmlNewInputStream(ctxt);
2372
if (input == NULL) {
2375
length = xmlStrlen(entity->name) + 5;
2376
buffer = xmlMallocAtomic(length);
2377
if (buffer == NULL) {
2378
xmlErrMemory(ctxt, NULL);
2384
buffer [length-3] = ';';
2385
buffer [length-2] = ' ';
2386
buffer [length-1] = 0;
2387
memcpy(buffer + 2, entity->name, length - 5);
2388
input->free = deallocblankswrapper;
2389
input->base = buffer;
2390
input->cur = buffer;
2391
input->length = length;
2392
input->end = &buffer[length];
2397
* xmlParserHandlePEReference:
2398
* @ctxt: the parser context
2400
* [69] PEReference ::= '%' Name ';'
2402
* [ WFC: No Recursion ]
2403
* A parsed entity must not contain a recursive
2404
* reference to itself, either directly or indirectly.
2406
* [ WFC: Entity Declared ]
2407
* In a document without any DTD, a document with only an internal DTD
2408
* subset which contains no parameter entity references, or a document
2409
* with "standalone='yes'", ... ... The declaration of a parameter
2410
* entity must precede any reference to it...
2412
* [ VC: Entity Declared ]
2413
* In a document with an external subset or external parameter entities
2414
* with "standalone='no'", ... ... The declaration of a parameter entity
2415
* must precede any reference to it...
2418
* Parameter-entity references may only appear in the DTD.
2419
* NOTE: misleading but this is handled.
2421
* A PEReference may have been detected in the current input stream
2422
* the handling is done accordingly to
2423
* http://www.w3.org/TR/REC-xml#entproc
2425
* - Included in literal in entity values
2426
* - Included as Parameter Entity reference within DTDs
2429
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2430
const xmlChar *name;
2431
xmlEntityPtr entity = NULL;
2432
xmlParserInputPtr input;
2434
if (RAW != '%') return;
2435
switch(ctxt->instate) {
2436
case XML_PARSER_CDATA_SECTION:
2438
case XML_PARSER_COMMENT:
2440
case XML_PARSER_START_TAG:
2442
case XML_PARSER_END_TAG:
2444
case XML_PARSER_EOF:
2445
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2447
case XML_PARSER_PROLOG:
2448
case XML_PARSER_START:
2449
case XML_PARSER_MISC:
2450
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2452
case XML_PARSER_ENTITY_DECL:
2453
case XML_PARSER_CONTENT:
2454
case XML_PARSER_ATTRIBUTE_VALUE:
2456
case XML_PARSER_SYSTEM_LITERAL:
2457
case XML_PARSER_PUBLIC_LITERAL:
2458
/* we just ignore it there */
2460
case XML_PARSER_EPILOG:
2461
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2463
case XML_PARSER_ENTITY_VALUE:
2465
* NOTE: in the case of entity values, we don't do the
2466
* substitution here since we need the literal
2467
* entity value to be able to save the internal
2468
* subset of the document.
2469
* This will be handled by xmlStringDecodeEntities
2472
case XML_PARSER_DTD:
2474
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2475
* In the internal DTD subset, parameter-entity references
2476
* can occur only where markup declarations can occur, not
2477
* within markup declarations.
2478
* In that case this is handled in xmlParseMarkupDecl
2480
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2482
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2485
case XML_PARSER_IGNORE:
2490
name = xmlParseName(ctxt);
2491
if (xmlParserDebugEntities)
2492
xmlGenericError(xmlGenericErrorContext,
2493
"PEReference: %s\n", name);
2495
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2499
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2500
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2501
if (entity == NULL) {
2504
* [ WFC: Entity Declared ]
2505
* In a document without any DTD, a document with only an
2506
* internal DTD subset which contains no parameter entity
2507
* references, or a document with "standalone='yes'", ...
2508
* ... The declaration of a parameter entity must precede
2509
* any reference to it...
2511
if ((ctxt->standalone == 1) ||
2512
((ctxt->hasExternalSubset == 0) &&
2513
(ctxt->hasPErefs == 0))) {
2514
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2515
"PEReference: %%%s; not found\n", name);
2518
* [ VC: Entity Declared ]
2519
* In a document with an external subset or external
2520
* parameter entities with "standalone='no'", ...
2521
* ... The declaration of a parameter entity must precede
2522
* any reference to it...
2524
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2525
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2526
"PEReference: %%%s; not found\n",
2529
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2530
"PEReference: %%%s; not found\n",
2534
} else if (ctxt->input->free != deallocblankswrapper) {
2535
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2536
if (xmlPushInput(ctxt, input) < 0)
2539
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2540
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2542
xmlCharEncoding enc;
2545
* handle the extra spaces added before and after
2546
* c.f. http://www.w3.org/TR/REC-xml#as-PE
2547
* this is done independently.
2549
input = xmlNewEntityInputStream(ctxt, entity);
2550
if (xmlPushInput(ctxt, input) < 0)
2554
* Get the 4 first bytes and decode the charset
2555
* if enc != XML_CHAR_ENCODING_NONE
2556
* plug some encoding conversion routines.
2557
* Note that, since we may have some non-UTF8
2558
* encoding (like UTF16, bug 135229), the 'length'
2559
* is not known, but we can calculate based upon
2560
* the amount of data in the buffer.
2563
if ((ctxt->input->end - ctxt->input->cur)>=4) {
2568
enc = xmlDetectCharEncoding(start, 4);
2569
if (enc != XML_CHAR_ENCODING_NONE) {
2570
xmlSwitchEncoding(ctxt, enc);
2574
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2575
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2576
(IS_BLANK_CH(NXT(5)))) {
2577
xmlParseTextDecl(ctxt);
2580
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2581
"PEReference: %s is not a parameter entity\n",
2586
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2592
* Macro used to grow the current buffer.
2593
* buffer##_size is expected to be a size_t
2594
* mem_error: is expected to handle memory allocation failures
2596
#define growBuffer(buffer, n) { \
2598
size_t new_size = buffer##_size * 2 + n; \
2599
if (new_size < buffer##_size) goto mem_error; \
2600
tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2601
if (tmp == NULL) goto mem_error; \
2603
buffer##_size = new_size; \
2607
* xmlStringLenDecodeEntities:
2608
* @ctxt: the parser context
2609
* @str: the input string
2610
* @len: the string length
2611
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2612
* @end: an end marker xmlChar, 0 if none
2613
* @end2: an end marker xmlChar, 0 if none
2614
* @end3: an end marker xmlChar, 0 if none
2616
* Takes a entity string content and process to do the adequate substitutions.
2618
* [67] Reference ::= EntityRef | CharRef
2620
* [69] PEReference ::= '%' Name ';'
2622
* Returns A newly allocated string with the substitution done. The caller
2623
* must deallocate it !
2626
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2627
int what, xmlChar end, xmlChar end2, xmlChar end3) {
2628
xmlChar *buffer = NULL;
2629
size_t buffer_size = 0;
2632
xmlChar *current = NULL;
2633
xmlChar *rep = NULL;
2634
const xmlChar *last;
2638
if ((ctxt == NULL) || (str == NULL) || (len < 0))
2642
if (((ctxt->depth > 40) &&
2643
((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2644
(ctxt->depth > 1024)) {
2645
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2650
* allocate a translation buffer.
2652
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2653
buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2654
if (buffer == NULL) goto mem_error;
2657
* OK loop until we reach one of the ending char or a size limit.
2658
* we are operating on already parsed values.
2661
c = CUR_SCHAR(str, l);
2664
while ((c != 0) && (c != end) && /* non input consuming loop */
2665
(c != end2) && (c != end3)) {
2668
if ((c == '&') && (str[1] == '#')) {
2669
int val = xmlParseStringCharRef(ctxt, &str);
2671
COPY_BUF(0,buffer,nbchars,val);
2673
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2674
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2676
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2677
if (xmlParserDebugEntities)
2678
xmlGenericError(xmlGenericErrorContext,
2679
"String decoding Entity Reference: %.30s\n",
2681
ent = xmlParseStringEntityRef(ctxt, &str);
2682
if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2683
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2686
ctxt->nbentities += ent->checked;
2687
if ((ent != NULL) &&
2688
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2689
if (ent->content != NULL) {
2690
COPY_BUF(0,buffer,nbchars,ent->content[0]);
2691
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2692
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2695
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2696
"predefined entity has no content\n");
2698
} else if ((ent != NULL) && (ent->content != NULL)) {
2700
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2706
while (*current != 0) { /* non input consuming loop */
2707
buffer[nbchars++] = *current++;
2708
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2709
if (xmlParserEntityCheck(ctxt, nbchars, ent))
2711
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2717
} else if (ent != NULL) {
2718
int i = xmlStrlen(ent->name);
2719
const xmlChar *cur = ent->name;
2721
buffer[nbchars++] = '&';
2722
if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2726
buffer[nbchars++] = *cur++;
2727
buffer[nbchars++] = ';';
2729
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
if (xmlParserDebugEntities)
2731
xmlGenericError(xmlGenericErrorContext,
2732
"String decoding PE Reference: %.30s\n", str);
2733
ent = xmlParseStringPEReference(ctxt, &str);
2734
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2737
ctxt->nbentities += ent->checked;
2739
if (ent->content == NULL) {
2740
xmlLoadEntityContent(ctxt, ent);
2743
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2748
while (*current != 0) { /* non input consuming loop */
2749
buffer[nbchars++] = *current++;
2750
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2751
if (xmlParserEntityCheck(ctxt, nbchars, ent))
2753
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2761
COPY_BUF(l,buffer,nbchars,c);
2763
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2764
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2768
c = CUR_SCHAR(str, l);
2772
buffer[nbchars] = 0;
2776
xmlErrMemory(ctxt, NULL);
2786
* xmlStringDecodeEntities:
2787
* @ctxt: the parser context
2788
* @str: the input string
2789
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2790
* @end: an end marker xmlChar, 0 if none
2791
* @end2: an end marker xmlChar, 0 if none
2792
* @end3: an end marker xmlChar, 0 if none
2794
* Takes a entity string content and process to do the adequate substitutions.
2796
* [67] Reference ::= EntityRef | CharRef
2798
* [69] PEReference ::= '%' Name ';'
2800
* Returns A newly allocated string with the substitution done. The caller
2801
* must deallocate it !
2804
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2805
xmlChar end, xmlChar end2, xmlChar end3) {
2806
if ((ctxt == NULL) || (str == NULL)) return(NULL);
2807
return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2811
/************************************************************************
2813
* Commodity functions, cleanup needed ? *
2815
************************************************************************/
2819
* @ctxt: an XML parser context
2821
* @len: the size of @str
2822
* @blank_chars: we know the chars are blanks
2824
* Is this a sequence of blank chars that one can ignore ?
2826
* Returns 1 if ignorable 0 otherwise.
2829
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2832
xmlNodePtr lastChild;
2835
* Don't spend time trying to differentiate them, the same callback is
2838
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2842
* Check for xml:space value.
2844
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2845
(*(ctxt->space) == -2))
2849
* Check that the string is made of blanks
2851
if (blank_chars == 0) {
2852
for (i = 0;i < len;i++)
2853
if (!(IS_BLANK_CH(str[i]))) return(0);
2857
* Look if the element is mixed content in the DTD if available
2859
if (ctxt->node == NULL) return(0);
2860
if (ctxt->myDoc != NULL) {
2861
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2862
if (ret == 0) return(1);
2863
if (ret == 1) return(0);
2867
* Otherwise, heuristic :-\
2869
if ((RAW != '<') && (RAW != 0xD)) return(0);
2870
if ((ctxt->node->children == NULL) &&
2871
(RAW == '<') && (NXT(1) == '/')) return(0);
2873
lastChild = xmlGetLastChild(ctxt->node);
2874
if (lastChild == NULL) {
2875
if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2876
(ctxt->node->content != NULL)) return(0);
2877
} else if (xmlNodeIsText(lastChild))
2879
else if ((ctxt->node->children != NULL) &&
2880
(xmlNodeIsText(ctxt->node->children)))
2885
/************************************************************************
2887
* Extra stuff for namespace support *
2888
* Relates to http://www.w3.org/TR/WD-xml-names *
2890
************************************************************************/
2894
* @ctxt: an XML parser context
2895
* @name: an XML parser context
2896
* @prefix: a xmlChar **
2898
* parse an UTF8 encoded XML qualified name string
2900
* [NS 5] QName ::= (Prefix ':')? LocalPart
2902
* [NS 6] Prefix ::= NCName
2904
* [NS 7] LocalPart ::= NCName
2906
* Returns the local part, and prefix is updated
2907
* to get the Prefix if any.
2911
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2912
xmlChar buf[XML_MAX_NAMELEN + 5];
2913
xmlChar *buffer = NULL;
2915
int max = XML_MAX_NAMELEN;
2916
xmlChar *ret = NULL;
2917
const xmlChar *cur = name;
2920
if (prefix == NULL) return(NULL);
2923
if (cur == NULL) return(NULL);
2925
#ifndef XML_XML_NAMESPACE
2926
/* xml: prefix is not really a namespace */
2927
if ((cur[0] == 'x') && (cur[1] == 'm') &&
2928
(cur[2] == 'l') && (cur[3] == ':'))
2929
return(xmlStrdup(name));
2932
/* nasty but well=formed */
2934
return(xmlStrdup(name));
2937
while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2943
* Okay someone managed to make a huge name, so he's ready to pay
2944
* for the processing speed.
2948
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2949
if (buffer == NULL) {
2950
xmlErrMemory(ctxt, NULL);
2953
memcpy(buffer, buf, len);
2954
while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2955
if (len + 10 > max) {
2959
tmp = (xmlChar *) xmlRealloc(buffer,
2960
max * sizeof(xmlChar));
2963
xmlErrMemory(ctxt, NULL);
2974
if ((c == ':') && (*cur == 0)) {
2978
return(xmlStrdup(name));
2982
ret = xmlStrndup(buf, len);
2986
max = XML_MAX_NAMELEN;
2994
return(xmlStrndup(BAD_CAST "", 0));
2999
* Check that the first character is proper to start
3002
if (!(((c >= 0x61) && (c <= 0x7A)) ||
3003
((c >= 0x41) && (c <= 0x5A)) ||
3004
(c == '_') || (c == ':'))) {
3006
int first = CUR_SCHAR(cur, l);
3008
if (!IS_LETTER(first) && (first != '_')) {
3009
xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3010
"Name %s is not XML Namespace compliant\n",
3016
while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3022
* Okay someone managed to make a huge name, so he's ready to pay
3023
* for the processing speed.
3027
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3028
if (buffer == NULL) {
3029
xmlErrMemory(ctxt, NULL);
3032
memcpy(buffer, buf, len);
3033
while (c != 0) { /* tested bigname2.xml */
3034
if (len + 10 > max) {
3038
tmp = (xmlChar *) xmlRealloc(buffer,
3039
max * sizeof(xmlChar));
3041
xmlErrMemory(ctxt, NULL);
3054
ret = xmlStrndup(buf, len);
3063
/************************************************************************
3065
* The parser itself *
3066
* Relates to http://www.w3.org/TR/REC-xml *
3068
************************************************************************/
3070
/************************************************************************
3072
* Routines to parse Name, NCName and NmToken *
3074
************************************************************************/
3076
static unsigned long nbParseName = 0;
3077
static unsigned long nbParseNmToken = 0;
3078
static unsigned long nbParseNCName = 0;
3079
static unsigned long nbParseNCNameComplex = 0;
3080
static unsigned long nbParseNameComplex = 0;
3081
static unsigned long nbParseStringName = 0;
3085
* The two following functions are related to the change of accepted
3086
* characters for Name and NmToken in the Revision 5 of XML-1.0
3087
* They correspond to the modified production [4] and the new production [4a]
3088
* changes in that revision. Also note that the macros used for the
3089
* productions Letter, Digit, CombiningChar and Extender are not needed
3091
* We still keep compatibility to pre-revision5 parsing semantic if the
3092
* new XML_PARSE_OLD10 option is given to the parser.
3095
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3096
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3098
* Use the new checks of production [4] [4a] amd [5] of the
3099
* Update 5 of XML-1.0
3101
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3102
(((c >= 'a') && (c <= 'z')) ||
3103
((c >= 'A') && (c <= 'Z')) ||
3104
(c == '_') || (c == ':') ||
3105
((c >= 0xC0) && (c <= 0xD6)) ||
3106
((c >= 0xD8) && (c <= 0xF6)) ||
3107
((c >= 0xF8) && (c <= 0x2FF)) ||
3108
((c >= 0x370) && (c <= 0x37D)) ||
3109
((c >= 0x37F) && (c <= 0x1FFF)) ||
3110
((c >= 0x200C) && (c <= 0x200D)) ||
3111
((c >= 0x2070) && (c <= 0x218F)) ||
3112
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3113
((c >= 0x3001) && (c <= 0xD7FF)) ||
3114
((c >= 0xF900) && (c <= 0xFDCF)) ||
3115
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3116
((c >= 0x10000) && (c <= 0xEFFFF))))
3119
if (IS_LETTER(c) || (c == '_') || (c == ':'))
3126
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3127
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3129
* Use the new checks of production [4] [4a] amd [5] of the
3130
* Update 5 of XML-1.0
3132
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3133
(((c >= 'a') && (c <= 'z')) ||
3134
((c >= 'A') && (c <= 'Z')) ||
3135
((c >= '0') && (c <= '9')) || /* !start */
3136
(c == '_') || (c == ':') ||
3137
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3138
((c >= 0xC0) && (c <= 0xD6)) ||
3139
((c >= 0xD8) && (c <= 0xF6)) ||
3140
((c >= 0xF8) && (c <= 0x2FF)) ||
3141
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3142
((c >= 0x370) && (c <= 0x37D)) ||
3143
((c >= 0x37F) && (c <= 0x1FFF)) ||
3144
((c >= 0x200C) && (c <= 0x200D)) ||
3145
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3146
((c >= 0x2070) && (c <= 0x218F)) ||
3147
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148
((c >= 0x3001) && (c <= 0xD7FF)) ||
3149
((c >= 0xF900) && (c <= 0xFDCF)) ||
3150
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151
((c >= 0x10000) && (c <= 0xEFFFF))))
3154
if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3155
(c == '.') || (c == '-') ||
3156
(c == '_') || (c == ':') ||
3157
(IS_COMBINING(c)) ||
3164
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3165
int *len, int *alloc, int normalize);
3167
static const xmlChar *
3168
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3174
nbParseNameComplex++;
3178
* Handler for more complex cases
3182
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3184
* Use the new checks of production [4] [4a] amd [5] of the
3185
* Update 5 of XML-1.0
3187
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3188
(!(((c >= 'a') && (c <= 'z')) ||
3189
((c >= 'A') && (c <= 'Z')) ||
3190
(c == '_') || (c == ':') ||
3191
((c >= 0xC0) && (c <= 0xD6)) ||
3192
((c >= 0xD8) && (c <= 0xF6)) ||
3193
((c >= 0xF8) && (c <= 0x2FF)) ||
3194
((c >= 0x370) && (c <= 0x37D)) ||
3195
((c >= 0x37F) && (c <= 0x1FFF)) ||
3196
((c >= 0x200C) && (c <= 0x200D)) ||
3197
((c >= 0x2070) && (c <= 0x218F)) ||
3198
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3199
((c >= 0x3001) && (c <= 0xD7FF)) ||
3200
((c >= 0xF900) && (c <= 0xFDCF)) ||
3201
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3202
((c >= 0x10000) && (c <= 0xEFFFF))))) {
3208
while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3209
(((c >= 'a') && (c <= 'z')) ||
3210
((c >= 'A') && (c <= 'Z')) ||
3211
((c >= '0') && (c <= '9')) || /* !start */
3212
(c == '_') || (c == ':') ||
3213
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3214
((c >= 0xC0) && (c <= 0xD6)) ||
3215
((c >= 0xD8) && (c <= 0xF6)) ||
3216
((c >= 0xF8) && (c <= 0x2FF)) ||
3217
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3218
((c >= 0x370) && (c <= 0x37D)) ||
3219
((c >= 0x37F) && (c <= 0x1FFF)) ||
3220
((c >= 0x200C) && (c <= 0x200D)) ||
3221
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3222
((c >= 0x2070) && (c <= 0x218F)) ||
3223
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3224
((c >= 0x3001) && (c <= 0xD7FF)) ||
3225
((c >= 0xF900) && (c <= 0xFDCF)) ||
3226
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3227
((c >= 0x10000) && (c <= 0xEFFFF))
3229
if (count++ > 100) {
3238
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3239
(!IS_LETTER(c) && (c != '_') &&
3247
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3248
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3249
(c == '.') || (c == '-') ||
3250
(c == '_') || (c == ':') ||
3251
(IS_COMBINING(c)) ||
3252
(IS_EXTENDER(c)))) {
3253
if (count++ > 100) {
3262
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3263
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3264
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3269
* @ctxt: an XML parser context
3271
* parse an XML name.
3273
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3274
* CombiningChar | Extender
3276
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3278
* [6] Names ::= Name (#x20 Name)*
3280
* Returns the Name parsed or NULL
3284
xmlParseName(xmlParserCtxtPtr ctxt) {
3296
* Accelerator for simple ASCII names
3298
in = ctxt->input->cur;
3299
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3300
((*in >= 0x41) && (*in <= 0x5A)) ||
3301
(*in == '_') || (*in == ':')) {
3303
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3304
((*in >= 0x41) && (*in <= 0x5A)) ||
3305
((*in >= 0x30) && (*in <= 0x39)) ||
3306
(*in == '_') || (*in == '-') ||
3307
(*in == ':') || (*in == '.'))
3309
if ((*in > 0) && (*in < 0x80)) {
3310
count = in - ctxt->input->cur;
3311
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3312
ctxt->input->cur = in;
3313
ctxt->nbChars += count;
3314
ctxt->input->col += count;
3316
xmlErrMemory(ctxt, NULL);
3320
/* accelerator for special cases */
3321
return(xmlParseNameComplex(ctxt));
3324
static const xmlChar *
3325
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3331
nbParseNCNameComplex++;
3335
* Handler for more complex cases
3339
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3340
(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3344
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3345
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
3346
if (count++ > 100) {
3354
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3359
* @ctxt: an XML parser context
3360
* @len: lenght of the string parsed
3362
* parse an XML name.
3364
* [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3365
* CombiningChar | Extender
3367
* [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3369
* Returns the Name parsed or NULL
3372
static const xmlChar *
3373
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3383
* Accelerator for simple ASCII names
3385
in = ctxt->input->cur;
3386
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
((*in >= 0x41) && (*in <= 0x5A)) ||
3390
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
((*in >= 0x41) && (*in <= 0x5A)) ||
3392
((*in >= 0x30) && (*in <= 0x39)) ||
3393
(*in == '_') || (*in == '-') ||
3396
if ((*in > 0) && (*in < 0x80)) {
3397
count = in - ctxt->input->cur;
3398
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3399
ctxt->input->cur = in;
3400
ctxt->nbChars += count;
3401
ctxt->input->col += count;
3403
xmlErrMemory(ctxt, NULL);
3408
return(xmlParseNCNameComplex(ctxt));
3412
* xmlParseNameAndCompare:
3413
* @ctxt: an XML parser context
3415
* parse an XML name and compares for match
3416
* (specialized for endtag parsing)
3418
* Returns NULL for an illegal name, (xmlChar*) 1 for success
3419
* and the name for mismatch
3422
static const xmlChar *
3423
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3424
register const xmlChar *cmp = other;
3425
register const xmlChar *in;
3430
in = ctxt->input->cur;
3431
while (*in != 0 && *in == *cmp) {
3436
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3438
ctxt->input->cur = in;
3439
return (const xmlChar*) 1;
3441
/* failure (or end of input buffer), check with full function */
3442
ret = xmlParseName (ctxt);
3443
/* strings coming from the dictionnary direct compare possible */
3445
return (const xmlChar*) 1;
3451
* xmlParseStringName:
3452
* @ctxt: an XML parser context
3453
* @str: a pointer to the string pointer (IN/OUT)
3455
* parse an XML name.
3457
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3458
* CombiningChar | Extender
3460
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3462
* [6] Names ::= Name (#x20 Name)*
3464
* Returns the Name parsed or NULL. The @str pointer
3465
* is updated to the current location in the string.
3469
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3470
xmlChar buf[XML_MAX_NAMELEN + 5];
3471
const xmlChar *cur = *str;
3476
nbParseStringName++;
3479
c = CUR_SCHAR(cur, l);
3480
if (!xmlIsNameStartChar(ctxt, c)) {
3484
COPY_BUF(l,buf,len,c);
3486
c = CUR_SCHAR(cur, l);
3487
while (xmlIsNameChar(ctxt, c)) {
3488
COPY_BUF(l,buf,len,c);
3490
c = CUR_SCHAR(cur, l);
3491
if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3493
* Okay someone managed to make a huge name, so he's ready to pay
3494
* for the processing speed.
3499
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3500
if (buffer == NULL) {
3501
xmlErrMemory(ctxt, NULL);
3504
memcpy(buffer, buf, len);
3505
while (xmlIsNameChar(ctxt, c)) {
3506
if (len + 10 > max) {
3509
tmp = (xmlChar *) xmlRealloc(buffer,
3510
max * sizeof(xmlChar));
3512
xmlErrMemory(ctxt, NULL);
3518
COPY_BUF(l,buffer,len,c);
3520
c = CUR_SCHAR(cur, l);
3528
return(xmlStrndup(buf, len));
3533
* @ctxt: an XML parser context
3535
* parse an XML Nmtoken.
3537
* [7] Nmtoken ::= (NameChar)+
3539
* [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3541
* Returns the Nmtoken parsed or NULL
3545
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3546
xmlChar buf[XML_MAX_NAMELEN + 5];
3558
while (xmlIsNameChar(ctxt, c)) {
3559
if (count++ > 100) {
3563
COPY_BUF(l,buf,len,c);
3566
if (len >= XML_MAX_NAMELEN) {
3568
* Okay someone managed to make a huge token, so he's ready to pay
3569
* for the processing speed.
3574
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3575
if (buffer == NULL) {
3576
xmlErrMemory(ctxt, NULL);
3579
memcpy(buffer, buf, len);
3580
while (xmlIsNameChar(ctxt, c)) {
3581
if (count++ > 100) {
3585
if (len + 10 > max) {
3589
tmp = (xmlChar *) xmlRealloc(buffer,
3590
max * sizeof(xmlChar));
3592
xmlErrMemory(ctxt, NULL);
3598
COPY_BUF(l,buffer,len,c);
3608
return(xmlStrndup(buf, len));
3612
* xmlParseEntityValue:
3613
* @ctxt: an XML parser context
3614
* @orig: if non-NULL store a copy of the original entity value
3616
* parse a value for ENTITY declarations
3618
* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3619
* "'" ([^%&'] | PEReference | Reference)* "'"
3621
* Returns the EntityValue parsed with reference substituted or NULL
3625
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3626
xmlChar *buf = NULL;
3628
int size = XML_PARSER_BUFFER_SIZE;
3631
xmlChar *ret = NULL;
3632
const xmlChar *cur = NULL;
3633
xmlParserInputPtr input;
3635
if (RAW == '"') stop = '"';
3636
else if (RAW == '\'') stop = '\'';
3638
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3641
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3643
xmlErrMemory(ctxt, NULL);
3648
* The content of the entity definition is copied in a buffer.
3651
ctxt->instate = XML_PARSER_ENTITY_VALUE;
3652
input = ctxt->input;
3657
* NOTE: 4.4.5 Included in Literal
3658
* When a parameter entity reference appears in a literal entity
3659
* value, ... a single or double quote character in the replacement
3660
* text is always treated as a normal data character and will not
3661
* terminate the literal.
3662
* In practice it means we stop the loop only when back at parsing
3663
* the initial entity and the quote is found
3665
while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3666
(ctxt->input != input))) {
3667
if (len + 5 >= size) {
3671
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3673
xmlErrMemory(ctxt, NULL);
3679
COPY_BUF(l,buf,len,c);
3682
* Pop-up of finished entities.
3684
while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3697
* Raise problem w.r.t. '&' and '%' being used in non-entities
3698
* reference constructs. Note Charref will be handled in
3699
* xmlStringDecodeEntities()
3702
while (*cur != 0) { /* non input consuming */
3703
if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3708
name = xmlParseStringName(ctxt, &cur);
3709
if ((name == NULL) || (*cur != ';')) {
3710
xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3711
"EntityValue: '%c' forbidden except for entities references\n",
3714
if ((tmp == '%') && (ctxt->inSubset == 1) &&
3715
(ctxt->inputNr == 1)) {
3716
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3727
* Then PEReference entities are substituted.
3730
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3735
* NOTE: 4.4.7 Bypassed
3736
* When a general entity reference appears in the EntityValue in
3737
* an entity declaration, it is bypassed and left as is.
3738
* so XML_SUBSTITUTE_REF is not set here.
3740
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3752
* xmlParseAttValueComplex:
3753
* @ctxt: an XML parser context
3754
* @len: the resulting attribute len
3755
* @normalize: wether to apply the inner normalization
3757
* parse a value for an attribute, this is the fallback function
3758
* of xmlParseAttValue() when the attribute parsing requires handling
3759
* of non-ASCII characters, or normalization compaction.
3761
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3764
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3766
xmlChar *buf = NULL;
3767
xmlChar *rep = NULL;
3769
size_t buf_size = 0;
3770
int c, l, in_space = 0;
3771
xmlChar *current = NULL;
3774
if (NXT(0) == '"') {
3775
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3778
} else if (NXT(0) == '\'') {
3780
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3783
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3788
* allocate a translation buffer.
3790
buf_size = XML_PARSER_BUFFER_SIZE;
3791
buf = (xmlChar *) xmlMallocAtomic(buf_size);
3792
if (buf == NULL) goto mem_error;
3795
* OK loop until we reach one of the ending char or a size limit.
3798
while ((NXT(0) != limit) && /* checked */
3799
(IS_CHAR(c)) && (c != '<')) {
3803
if (NXT(1) == '#') {
3804
int val = xmlParseCharRef(ctxt);
3807
if (ctxt->replaceEntities) {
3808
if (len + 10 > buf_size) {
3809
growBuffer(buf, 10);
3814
* The reparsing will be done in xmlStringGetNodeList()
3815
* called by the attribute() function in SAX.c
3817
if (len + 10 > buf_size) {
3818
growBuffer(buf, 10);
3826
} else if (val != 0) {
3827
if (len + 10 > buf_size) {
3828
growBuffer(buf, 10);
3830
len += xmlCopyChar(0, &buf[len], val);
3833
ent = xmlParseEntityRef(ctxt);
3836
ctxt->nbentities += ent->owner;
3837
if ((ent != NULL) &&
3838
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3839
if (len + 10 > buf_size) {
3840
growBuffer(buf, 10);
3842
if ((ctxt->replaceEntities == 0) &&
3843
(ent->content[0] == '&')) {
3850
buf[len++] = ent->content[0];
3852
} else if ((ent != NULL) &&
3853
(ctxt->replaceEntities != 0)) {
3854
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3855
rep = xmlStringDecodeEntities(ctxt, ent->content,
3860
while (*current != 0) { /* non input consuming */
3861
if ((*current == 0xD) || (*current == 0xA) ||
3862
(*current == 0x9)) {
3866
buf[len++] = *current++;
3867
if (len + 10 > buf_size) {
3868
growBuffer(buf, 10);
3875
if (len + 10 > buf_size) {
3876
growBuffer(buf, 10);
3878
if (ent->content != NULL)
3879
buf[len++] = ent->content[0];
3881
} else if (ent != NULL) {
3882
int i = xmlStrlen(ent->name);
3883
const xmlChar *cur = ent->name;
3886
* This may look absurd but is needed to detect
3889
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3890
(ent->content != NULL)) {
3891
rep = xmlStringDecodeEntities(ctxt, ent->content,
3892
XML_SUBSTITUTE_REF, 0, 0, 0);
3900
* Just output the reference
3903
while (len + i + 10 > buf_size) {
3904
growBuffer(buf, i + 10);
3907
buf[len++] = *cur++;
3912
if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3913
if ((len != 0) || (!normalize)) {
3914
if ((!normalize) || (!in_space)) {
3915
COPY_BUF(l,buf,len,0x20);
3916
while (len + 10 > buf_size) {
3917
growBuffer(buf, 10);
3924
COPY_BUF(l,buf,len,c);
3925
if (len + 10 > buf_size) {
3926
growBuffer(buf, 10);
3934
if ((in_space) && (normalize)) {
3935
while ((len > 0) && (buf[len - 1] == 0x20)) len--;
3939
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3940
} else if (RAW != limit) {
3941
if ((c != 0) && (!IS_CHAR(c))) {
3942
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3943
"invalid character in attribute value\n");
3945
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3946
"AttValue: ' expected\n");
3952
* There we potentially risk an overflow, don't allow attribute value of
3953
* lenght more than INT_MAX it is a very reasonnable assumption !
3955
if (len >= INT_MAX) {
3956
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3957
"AttValue lenght too long\n");
3961
if (attlen != NULL) *attlen = (int) len;
3965
xmlErrMemory(ctxt, NULL);
3975
* @ctxt: an XML parser context
3977
* parse a value for an attribute
3978
* Note: the parser won't do substitution of entities here, this
3979
* will be handled later in xmlStringGetNodeList
3981
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3982
* "'" ([^<&'] | Reference)* "'"
3984
* 3.3.3 Attribute-Value Normalization:
3985
* Before the value of an attribute is passed to the application or
3986
* checked for validity, the XML processor must normalize it as follows:
3987
* - a character reference is processed by appending the referenced
3988
* character to the attribute value
3989
* - an entity reference is processed by recursively processing the
3990
* replacement text of the entity
3991
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3992
* appending #x20 to the normalized value, except that only a single
3993
* #x20 is appended for a "#xD#xA" sequence that is part of an external
3994
* parsed entity or the literal entity value of an internal parsed entity
3995
* - other characters are processed by appending them to the normalized value
3996
* If the declared value is not CDATA, then the XML processor must further
3997
* process the normalized attribute value by discarding any leading and
3998
* trailing space (#x20) characters, and by replacing sequences of space
3999
* (#x20) characters by a single space (#x20) character.
4000
* All attributes for which no declaration has been read should be treated
4001
* by a non-validating parser as if declared CDATA.
4003
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4008
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4009
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4010
return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4014
* xmlParseSystemLiteral:
4015
* @ctxt: an XML parser context
4017
* parse an XML Literal
4019
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4021
* Returns the SystemLiteral parsed or NULL
4025
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4026
xmlChar *buf = NULL;
4028
int size = XML_PARSER_BUFFER_SIZE;
4031
int state = ctxt->instate;
4038
} else if (RAW == '\'') {
4042
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4046
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4048
xmlErrMemory(ctxt, NULL);
4051
ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4053
while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4054
if (len + 5 >= size) {
4058
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4061
xmlErrMemory(ctxt, NULL);
4062
ctxt->instate = (xmlParserInputState) state;
4072
COPY_BUF(l,buf,len,cur);
4082
ctxt->instate = (xmlParserInputState) state;
4083
if (!IS_CHAR(cur)) {
4084
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4092
* xmlParsePubidLiteral:
4093
* @ctxt: an XML parser context
4095
* parse an XML public literal
4097
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4099
* Returns the PubidLiteral parsed or NULL.
4103
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4104
xmlChar *buf = NULL;
4106
int size = XML_PARSER_BUFFER_SIZE;
4110
xmlParserInputState oldstate = ctxt->instate;
4116
} else if (RAW == '\'') {
4120
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4123
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4125
xmlErrMemory(ctxt, NULL);
4128
ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4130
while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4131
if (len + 1 >= size) {
4135
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4137
xmlErrMemory(ctxt, NULL);
4159
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4163
ctxt->instate = oldstate;
4167
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4170
* used for the test in the inner loop of the char data testing
4172
static const unsigned char test_char_data[256] = {
4173
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174
0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4175
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4178
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4179
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4180
0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4181
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4182
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4183
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4184
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4185
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4186
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4187
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4188
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4189
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4190
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4193
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4194
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4195
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4196
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4197
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4198
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4199
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4200
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4201
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4202
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4203
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4204
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4209
* @ctxt: an XML parser context
4210
* @cdata: int indicating whether we are within a CDATA section
4212
* parse a CharData section.
4213
* if we are within a CDATA section ']]>' marks an end of section.
4215
* The right angle bracket (>) may be represented using the string ">",
4216
* and must, for compatibility, be escaped using ">" or a character
4217
* reference when it appears in the string "]]>" in content, when that
4218
* string is not marking the end of a CDATA section.
4220
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4224
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4227
int line = ctxt->input->line;
4228
int col = ctxt->input->col;
4234
* Accelerated common case where input don't need to be
4235
* modified before passing it to the handler.
4238
in = ctxt->input->cur;
4241
while (*in == 0x20) { in++; ctxt->input->col++; }
4244
ctxt->input->line++; ctxt->input->col = 1;
4246
} while (*in == 0xA);
4247
goto get_more_space;
4250
nbchar = in - ctxt->input->cur;
4252
const xmlChar *tmp = ctxt->input->cur;
4253
ctxt->input->cur = in;
4255
if ((ctxt->sax != NULL) &&
4256
(ctxt->sax->ignorableWhitespace !=
4257
ctxt->sax->characters)) {
4258
if (areBlanks(ctxt, tmp, nbchar, 1)) {
4259
if (ctxt->sax->ignorableWhitespace != NULL)
4260
ctxt->sax->ignorableWhitespace(ctxt->userData,
4263
if (ctxt->sax->characters != NULL)
4264
ctxt->sax->characters(ctxt->userData,
4266
if (*ctxt->space == -1)
4269
} else if ((ctxt->sax != NULL) &&
4270
(ctxt->sax->characters != NULL)) {
4271
ctxt->sax->characters(ctxt->userData,
4279
ccol = ctxt->input->col;
4280
while (test_char_data[*in]) {
4284
ctxt->input->col = ccol;
4287
ctxt->input->line++; ctxt->input->col = 1;
4289
} while (*in == 0xA);
4293
if ((in[1] == ']') && (in[2] == '>')) {
4294
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4295
ctxt->input->cur = in;
4302
nbchar = in - ctxt->input->cur;
4304
if ((ctxt->sax != NULL) &&
4305
(ctxt->sax->ignorableWhitespace !=
4306
ctxt->sax->characters) &&
4307
(IS_BLANK_CH(*ctxt->input->cur))) {
4308
const xmlChar *tmp = ctxt->input->cur;
4309
ctxt->input->cur = in;
4311
if (areBlanks(ctxt, tmp, nbchar, 0)) {
4312
if (ctxt->sax->ignorableWhitespace != NULL)
4313
ctxt->sax->ignorableWhitespace(ctxt->userData,
4316
if (ctxt->sax->characters != NULL)
4317
ctxt->sax->characters(ctxt->userData,
4319
if (*ctxt->space == -1)
4322
line = ctxt->input->line;
4323
col = ctxt->input->col;
4324
} else if (ctxt->sax != NULL) {
4325
if (ctxt->sax->characters != NULL)
4326
ctxt->sax->characters(ctxt->userData,
4327
ctxt->input->cur, nbchar);
4328
line = ctxt->input->line;
4329
col = ctxt->input->col;
4331
/* something really bad happened in the SAX callback */
4332
if (ctxt->instate != XML_PARSER_CONTENT)
4335
ctxt->input->cur = in;
4339
ctxt->input->cur = in;
4341
ctxt->input->line++; ctxt->input->col = 1;
4342
continue; /* while */
4354
in = ctxt->input->cur;
4355
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4358
ctxt->input->line = line;
4359
ctxt->input->col = col;
4360
xmlParseCharDataComplex(ctxt, cdata);
4364
* xmlParseCharDataComplex:
4365
* @ctxt: an XML parser context
4366
* @cdata: int indicating whether we are within a CDATA section
4368
* parse a CharData section.this is the fallback function
4369
* of xmlParseCharData() when the parsing requires handling
4370
* of non-ASCII characters.
4373
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4374
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4382
while ((cur != '<') && /* checked */
4384
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4385
if ((cur == ']') && (NXT(1) == ']') &&
4389
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4392
COPY_BUF(l,buf,nbchar,cur);
4393
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4397
* OK the segment is to be consumed as chars.
4399
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4400
if (areBlanks(ctxt, buf, nbchar, 0)) {
4401
if (ctxt->sax->ignorableWhitespace != NULL)
4402
ctxt->sax->ignorableWhitespace(ctxt->userData,
4405
if (ctxt->sax->characters != NULL)
4406
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4407
if ((ctxt->sax->characters !=
4408
ctxt->sax->ignorableWhitespace) &&
4409
(*ctxt->space == -1))
4414
/* something really bad happened in the SAX callback */
4415
if (ctxt->instate != XML_PARSER_CONTENT)
4429
* OK the segment is to be consumed as chars.
4431
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4432
if (areBlanks(ctxt, buf, nbchar, 0)) {
4433
if (ctxt->sax->ignorableWhitespace != NULL)
4434
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4436
if (ctxt->sax->characters != NULL)
4437
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4438
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4439
(*ctxt->space == -1))
4444
if ((cur != 0) && (!IS_CHAR(cur))) {
4445
/* Generate the error and skip the offending character */
4446
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4447
"PCDATA invalid Char value %d\n",
4454
* xmlParseExternalID:
4455
* @ctxt: an XML parser context
4456
* @publicID: a xmlChar** receiving PubidLiteral
4457
* @strict: indicate whether we should restrict parsing to only
4458
* production [75], see NOTE below
4460
* Parse an External ID or a Public ID
4462
* NOTE: Productions [75] and [83] interact badly since [75] can generate
4463
* 'PUBLIC' S PubidLiteral S SystemLiteral
4465
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4466
* | 'PUBLIC' S PubidLiteral S SystemLiteral
4468
* [83] PublicID ::= 'PUBLIC' S PubidLiteral
4470
* Returns the function returns SystemLiteral and in the second
4471
* case publicID receives PubidLiteral, is strict is off
4472
* it is possible to return NULL and have publicID set.
4476
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4477
xmlChar *URI = NULL;
4482
if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4484
if (!IS_BLANK_CH(CUR)) {
4485
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4486
"Space required after 'SYSTEM'\n");
4489
URI = xmlParseSystemLiteral(ctxt);
4491
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4493
} else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4495
if (!IS_BLANK_CH(CUR)) {
4496
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4497
"Space required after 'PUBLIC'\n");
4500
*publicID = xmlParsePubidLiteral(ctxt);
4501
if (*publicID == NULL) {
4502
xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4506
* We don't handle [83] so "S SystemLiteral" is required.
4508
if (!IS_BLANK_CH(CUR)) {
4509
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4510
"Space required after the Public Identifier\n");
4514
* We handle [83] so we return immediately, if
4515
* "S SystemLiteral" is not detected. From a purely parsing
4516
* point of view that's a nice mess.
4522
if (!IS_BLANK_CH(*ptr)) return(NULL);
4524
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4525
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4528
URI = xmlParseSystemLiteral(ctxt);
4530
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4537
* xmlParseCommentComplex:
4538
* @ctxt: an XML parser context
4539
* @buf: the already parsed part of the buffer
4540
* @len: number of bytes filles in the buffer
4541
* @size: allocated size of the buffer
4543
* Skip an XML (SGML) comment <!-- .... -->
4544
* The spec says that "For compatibility, the string "--" (double-hyphen)
4545
* must not occur within comments. "
4546
* This is the slow routine in case the accelerator for ascii didn't work
4548
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4551
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4558
inputid = ctxt->input->id;
4562
size = XML_PARSER_BUFFER_SIZE;
4563
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4565
xmlErrMemory(ctxt, NULL);
4569
GROW; /* Assure there's enough input data */
4572
goto not_terminated;
4574
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4575
"xmlParseComment: invalid xmlChar value %d\n",
4583
goto not_terminated;
4585
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4586
"xmlParseComment: invalid xmlChar value %d\n",
4594
goto not_terminated;
4595
while (IS_CHAR(cur) && /* checked */
4597
(r != '-') || (q != '-'))) {
4598
if ((r == '-') && (q == '-')) {
4599
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4601
if (len + 5 >= size) {
4604
new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4605
if (new_buf == NULL) {
4607
xmlErrMemory(ctxt, NULL);
4612
COPY_BUF(ql,buf,len,q);
4633
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4634
"Comment not terminated \n<!--%.50s\n", buf);
4635
} else if (!IS_CHAR(cur)) {
4636
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4637
"xmlParseComment: invalid xmlChar value %d\n",
4640
if (inputid != ctxt->input->id) {
4641
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4642
"Comment doesn't start and stop in the same entity\n");
4645
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4646
(!ctxt->disableSAX))
4647
ctxt->sax->comment(ctxt->userData, buf);
4652
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4653
"Comment not terminated\n", NULL);
4660
* @ctxt: an XML parser context
4662
* Skip an XML (SGML) comment <!-- .... -->
4663
* The spec says that "For compatibility, the string "--" (double-hyphen)
4664
* must not occur within comments. "
4666
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4669
xmlParseComment(xmlParserCtxtPtr ctxt) {
4670
xmlChar *buf = NULL;
4671
int size = XML_PARSER_BUFFER_SIZE;
4673
xmlParserInputState state;
4675
int nbchar = 0, ccol;
4679
* Check that there is a comment right here.
4681
if ((RAW != '<') || (NXT(1) != '!') ||
4682
(NXT(2) != '-') || (NXT(3) != '-')) return;
4683
state = ctxt->instate;
4684
ctxt->instate = XML_PARSER_COMMENT;
4685
inputid = ctxt->input->id;
4691
* Accelerated common case where input don't need to be
4692
* modified before passing it to the handler.
4694
in = ctxt->input->cur;
4698
ctxt->input->line++; ctxt->input->col = 1;
4700
} while (*in == 0xA);
4703
ccol = ctxt->input->col;
4704
while (((*in > '-') && (*in <= 0x7F)) ||
4705
((*in >= 0x20) && (*in < '-')) ||
4710
ctxt->input->col = ccol;
4713
ctxt->input->line++; ctxt->input->col = 1;
4715
} while (*in == 0xA);
4718
nbchar = in - ctxt->input->cur;
4720
* save current set of data
4723
if ((ctxt->sax != NULL) &&
4724
(ctxt->sax->comment != NULL)) {
4726
if ((*in == '-') && (in[1] == '-'))
4729
size = XML_PARSER_BUFFER_SIZE + nbchar;
4730
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4732
xmlErrMemory(ctxt, NULL);
4733
ctxt->instate = state;
4737
} else if (len + nbchar + 1 >= size) {
4739
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4740
new_buf = (xmlChar *) xmlRealloc(buf,
4741
size * sizeof(xmlChar));
4742
if (new_buf == NULL) {
4744
xmlErrMemory(ctxt, NULL);
4745
ctxt->instate = state;
4750
memcpy(&buf[len], ctxt->input->cur, nbchar);
4755
ctxt->input->cur = in;
4758
ctxt->input->line++; ctxt->input->col = 1;
4763
ctxt->input->cur = in;
4765
ctxt->input->line++; ctxt->input->col = 1;
4766
continue; /* while */
4772
in = ctxt->input->cur;
4776
if (ctxt->input->id != inputid) {
4777
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4778
"comment doesn't start and stop in the same entity\n");
4781
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4782
(!ctxt->disableSAX)) {
4784
ctxt->sax->comment(ctxt->userData, buf);
4786
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4790
ctxt->instate = state;
4794
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4795
"Double hyphen within comment: "
4799
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4800
"Double hyphen within comment\n", NULL);
4808
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4809
xmlParseCommentComplex(ctxt, buf, len, size);
4810
ctxt->instate = state;
4817
* @ctxt: an XML parser context
4819
* parse the name of a PI
4821
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4823
* Returns the PITarget name or NULL
4827
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4828
const xmlChar *name;
4830
name = xmlParseName(ctxt);
4831
if ((name != NULL) &&
4832
((name[0] == 'x') || (name[0] == 'X')) &&
4833
((name[1] == 'm') || (name[1] == 'M')) &&
4834
((name[2] == 'l') || (name[2] == 'L'))) {
4836
if ((name[0] == 'x') && (name[1] == 'm') &&
4837
(name[2] == 'l') && (name[3] == 0)) {
4838
xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4839
"XML declaration allowed only at the start of the document\n");
4841
} else if (name[3] == 0) {
4842
xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4846
if (xmlW3CPIs[i] == NULL) break;
4847
if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4850
xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4851
"xmlParsePITarget: invalid name prefix 'xml'\n",
4854
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4855
xmlNsErr(ctxt, XML_NS_ERR_COLON,
4856
"colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4861
#ifdef LIBXML_CATALOG_ENABLED
4863
* xmlParseCatalogPI:
4864
* @ctxt: an XML parser context
4865
* @catalog: the PI value string
4867
* parse an XML Catalog Processing Instruction.
4869
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4871
* Occurs only if allowed by the user and if happening in the Misc
4872
* part of the document before any doctype informations
4873
* This will add the given catalog to the parsing context in order
4874
* to be used if there is a resolution need further down in the document
4878
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4879
xmlChar *URL = NULL;
4880
const xmlChar *tmp, *base;
4884
while (IS_BLANK_CH(*tmp)) tmp++;
4885
if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4888
while (IS_BLANK_CH(*tmp)) tmp++;
4893
while (IS_BLANK_CH(*tmp)) tmp++;
4895
if ((marker != '\'') && (marker != '"'))
4899
while ((*tmp != 0) && (*tmp != marker)) tmp++;
4902
URL = xmlStrndup(base, tmp - base);
4904
while (IS_BLANK_CH(*tmp)) tmp++;
4909
ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4915
xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4916
"Catalog PI syntax error: %s\n",
4925
* @ctxt: an XML parser context
4927
* parse an XML Processing Instruction.
4929
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4931
* The processing is transfered to SAX once parsed.
4935
xmlParsePI(xmlParserCtxtPtr ctxt) {
4936
xmlChar *buf = NULL;
4938
int size = XML_PARSER_BUFFER_SIZE;
4940
const xmlChar *target;
4941
xmlParserInputState state;
4944
if ((RAW == '<') && (NXT(1) == '?')) {
4945
xmlParserInputPtr input = ctxt->input;
4946
state = ctxt->instate;
4947
ctxt->instate = XML_PARSER_PI;
4949
* this is a Processing Instruction.
4955
* Parse the target name and check for special support like
4958
target = xmlParsePITarget(ctxt);
4959
if (target != NULL) {
4960
if ((RAW == '?') && (NXT(1) == '>')) {
4961
if (input != ctxt->input) {
4962
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4963
"PI declaration doesn't start and stop in the same entity\n");
4970
if ((ctxt->sax) && (!ctxt->disableSAX) &&
4971
(ctxt->sax->processingInstruction != NULL))
4972
ctxt->sax->processingInstruction(ctxt->userData,
4974
if (ctxt->instate != XML_PARSER_EOF)
4975
ctxt->instate = state;
4978
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4980
xmlErrMemory(ctxt, NULL);
4981
ctxt->instate = state;
4985
if (!IS_BLANK(cur)) {
4986
xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4987
"ParsePI: PI %s space expected\n", target);
4991
while (IS_CHAR(cur) && /* checked */
4992
((cur != '?') || (NXT(1) != '>'))) {
4993
if (len + 5 >= size) {
4997
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4999
xmlErrMemory(ctxt, NULL);
5001
ctxt->instate = state;
5011
COPY_BUF(l,buf,len,cur);
5022
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5023
"ParsePI: PI %s never end ...\n", target);
5025
if (input != ctxt->input) {
5026
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5027
"PI declaration doesn't start and stop in the same entity\n");
5031
#ifdef LIBXML_CATALOG_ENABLED
5032
if (((state == XML_PARSER_MISC) ||
5033
(state == XML_PARSER_START)) &&
5034
(xmlStrEqual(target, XML_CATALOG_PI))) {
5035
xmlCatalogAllow allow = xmlCatalogGetDefaults();
5036
if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5037
(allow == XML_CATA_ALLOW_ALL))
5038
xmlParseCatalogPI(ctxt, buf);
5046
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5047
(ctxt->sax->processingInstruction != NULL))
5048
ctxt->sax->processingInstruction(ctxt->userData,
5053
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5055
if (ctxt->instate != XML_PARSER_EOF)
5056
ctxt->instate = state;
5061
* xmlParseNotationDecl:
5062
* @ctxt: an XML parser context
5064
* parse a notation declaration
5066
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5068
* Hence there is actually 3 choices:
5069
* 'PUBLIC' S PubidLiteral
5070
* 'PUBLIC' S PubidLiteral S SystemLiteral
5071
* and 'SYSTEM' S SystemLiteral
5073
* See the NOTE on xmlParseExternalID().
5077
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5078
const xmlChar *name;
5082
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5083
xmlParserInputPtr input = ctxt->input;
5086
if (!IS_BLANK_CH(CUR)) {
5087
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5088
"Space required after '<!NOTATION'\n");
5093
name = xmlParseName(ctxt);
5095
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5098
if (!IS_BLANK_CH(CUR)) {
5099
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5100
"Space required after the NOTATION name'\n");
5103
if (xmlStrchr(name, ':') != NULL) {
5104
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5105
"colon are forbidden from notation names '%s'\n",
5113
Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5117
if (input != ctxt->input) {
5118
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5119
"Notation declaration doesn't start and stop in the same entity\n");
5122
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5123
(ctxt->sax->notationDecl != NULL))
5124
ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5126
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5128
if (Systemid != NULL) xmlFree(Systemid);
5129
if (Pubid != NULL) xmlFree(Pubid);
5134
* xmlParseEntityDecl:
5135
* @ctxt: an XML parser context
5137
* parse <!ENTITY declarations
5139
* [70] EntityDecl ::= GEDecl | PEDecl
5141
* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5143
* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5145
* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5147
* [74] PEDef ::= EntityValue | ExternalID
5149
* [76] NDataDecl ::= S 'NDATA' S Name
5151
* [ VC: Notation Declared ]
5152
* The Name must match the declared name of a notation.
5156
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5157
const xmlChar *name = NULL;
5158
xmlChar *value = NULL;
5159
xmlChar *URI = NULL, *literal = NULL;
5160
const xmlChar *ndata = NULL;
5161
int isParameter = 0;
5162
xmlChar *orig = NULL;
5165
/* GROW; done in the caller */
5166
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5167
xmlParserInputPtr input = ctxt->input;
5170
skipped = SKIP_BLANKS;
5172
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5173
"Space required after '<!ENTITY'\n");
5178
skipped = SKIP_BLANKS;
5180
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5181
"Space required after '%'\n");
5186
name = xmlParseName(ctxt);
5188
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5189
"xmlParseEntityDecl: no name\n");
5192
if (xmlStrchr(name, ':') != NULL) {
5193
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5194
"colon are forbidden from entities names '%s'\n",
5197
skipped = SKIP_BLANKS;
5199
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5200
"Space required after the entity name\n");
5203
ctxt->instate = XML_PARSER_ENTITY_DECL;
5205
* handle the various case of definitions...
5208
if ((RAW == '"') || (RAW == '\'')) {
5209
value = xmlParseEntityValue(ctxt, &orig);
5211
if ((ctxt->sax != NULL) &&
5212
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5213
ctxt->sax->entityDecl(ctxt->userData, name,
5214
XML_INTERNAL_PARAMETER_ENTITY,
5218
URI = xmlParseExternalID(ctxt, &literal, 1);
5219
if ((URI == NULL) && (literal == NULL)) {
5220
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5225
uri = xmlParseURI((const char *) URI);
5227
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5228
"Invalid URI: %s\n", URI);
5230
* This really ought to be a well formedness error
5231
* but the XML Core WG decided otherwise c.f. issue
5232
* E26 of the XML erratas.
5235
if (uri->fragment != NULL) {
5237
* Okay this is foolish to block those but not
5240
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5242
if ((ctxt->sax != NULL) &&
5243
(!ctxt->disableSAX) &&
5244
(ctxt->sax->entityDecl != NULL))
5245
ctxt->sax->entityDecl(ctxt->userData, name,
5246
XML_EXTERNAL_PARAMETER_ENTITY,
5247
literal, URI, NULL);
5254
if ((RAW == '"') || (RAW == '\'')) {
5255
value = xmlParseEntityValue(ctxt, &orig);
5256
if ((ctxt->sax != NULL) &&
5257
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5258
ctxt->sax->entityDecl(ctxt->userData, name,
5259
XML_INTERNAL_GENERAL_ENTITY,
5262
* For expat compatibility in SAX mode.
5264
if ((ctxt->myDoc == NULL) ||
5265
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5266
if (ctxt->myDoc == NULL) {
5267
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5268
if (ctxt->myDoc == NULL) {
5269
xmlErrMemory(ctxt, "New Doc failed");
5272
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5274
if (ctxt->myDoc->intSubset == NULL)
5275
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5276
BAD_CAST "fake", NULL, NULL);
5278
xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5282
URI = xmlParseExternalID(ctxt, &literal, 1);
5283
if ((URI == NULL) && (literal == NULL)) {
5284
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5289
uri = xmlParseURI((const char *)URI);
5291
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5292
"Invalid URI: %s\n", URI);
5294
* This really ought to be a well formedness error
5295
* but the XML Core WG decided otherwise c.f. issue
5296
* E26 of the XML erratas.
5299
if (uri->fragment != NULL) {
5301
* Okay this is foolish to block those but not
5304
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5309
if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5310
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5311
"Space required before 'NDATA'\n");
5314
if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5316
if (!IS_BLANK_CH(CUR)) {
5317
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5318
"Space required after 'NDATA'\n");
5321
ndata = xmlParseName(ctxt);
5322
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5323
(ctxt->sax->unparsedEntityDecl != NULL))
5324
ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5325
literal, URI, ndata);
5327
if ((ctxt->sax != NULL) &&
5328
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5329
ctxt->sax->entityDecl(ctxt->userData, name,
5330
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5331
literal, URI, NULL);
5333
* For expat compatibility in SAX mode.
5334
* assuming the entity repalcement was asked for
5336
if ((ctxt->replaceEntities != 0) &&
5337
((ctxt->myDoc == NULL) ||
5338
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5339
if (ctxt->myDoc == NULL) {
5340
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5341
if (ctxt->myDoc == NULL) {
5342
xmlErrMemory(ctxt, "New Doc failed");
5345
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5348
if (ctxt->myDoc->intSubset == NULL)
5349
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5350
BAD_CAST "fake", NULL, NULL);
5351
xmlSAX2EntityDecl(ctxt, name,
5352
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5353
literal, URI, NULL);
5360
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5361
"xmlParseEntityDecl: entity %s not terminated\n", name);
5363
if (input != ctxt->input) {
5364
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5365
"Entity declaration doesn't start and stop in the same entity\n");
5371
* Ugly mechanism to save the raw entity value.
5373
xmlEntityPtr cur = NULL;
5376
if ((ctxt->sax != NULL) &&
5377
(ctxt->sax->getParameterEntity != NULL))
5378
cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5380
if ((ctxt->sax != NULL) &&
5381
(ctxt->sax->getEntity != NULL))
5382
cur = ctxt->sax->getEntity(ctxt->userData, name);
5383
if ((cur == NULL) && (ctxt->userData==ctxt)) {
5384
cur = xmlSAX2GetEntity(ctxt, name);
5388
if (cur->orig != NULL)
5395
if (value != NULL) xmlFree(value);
5396
if (URI != NULL) xmlFree(URI);
5397
if (literal != NULL) xmlFree(literal);
5402
* xmlParseDefaultDecl:
5403
* @ctxt: an XML parser context
5404
* @value: Receive a possible fixed default value for the attribute
5406
* Parse an attribute default declaration
5408
* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5410
* [ VC: Required Attribute ]
5411
* if the default declaration is the keyword #REQUIRED, then the
5412
* attribute must be specified for all elements of the type in the
5413
* attribute-list declaration.
5415
* [ VC: Attribute Default Legal ]
5416
* The declared default value must meet the lexical constraints of
5417
* the declared attribute type c.f. xmlValidateAttributeDecl()
5419
* [ VC: Fixed Attribute Default ]
5420
* if an attribute has a default value declared with the #FIXED
5421
* keyword, instances of that attribute must match the default value.
5423
* [ WFC: No < in Attribute Values ]
5424
* handled in xmlParseAttValue()
5426
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5427
* or XML_ATTRIBUTE_FIXED.
5431
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5436
if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5438
return(XML_ATTRIBUTE_REQUIRED);
5440
if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5442
return(XML_ATTRIBUTE_IMPLIED);
5444
val = XML_ATTRIBUTE_NONE;
5445
if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5447
val = XML_ATTRIBUTE_FIXED;
5448
if (!IS_BLANK_CH(CUR)) {
5449
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5450
"Space required after '#FIXED'\n");
5454
ret = xmlParseAttValue(ctxt);
5455
ctxt->instate = XML_PARSER_DTD;
5457
xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5458
"Attribute default value declaration error\n");
5465
* xmlParseNotationType:
5466
* @ctxt: an XML parser context
5468
* parse an Notation attribute type.
5470
* Note: the leading 'NOTATION' S part has already being parsed...
5472
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5474
* [ VC: Notation Attributes ]
5475
* Values of this type must match one of the notation names included
5476
* in the declaration; all notation names in the declaration must be declared.
5478
* Returns: the notation attribute tree built while parsing
5482
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5483
const xmlChar *name;
5484
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5487
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5494
name = xmlParseName(ctxt);
5496
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5497
"Name expected in NOTATION declaration\n");
5498
xmlFreeEnumeration(ret);
5502
while (tmp != NULL) {
5503
if (xmlStrEqual(name, tmp->name)) {
5504
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5505
"standalone: attribute notation value token %s duplicated\n",
5507
if (!xmlDictOwns(ctxt->dict, name))
5508
xmlFree((xmlChar *) name);
5514
cur = xmlCreateEnumeration(name);
5516
xmlFreeEnumeration(ret);
5519
if (last == NULL) ret = last = cur;
5526
} while (RAW == '|');
5528
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5529
xmlFreeEnumeration(ret);
5537
* xmlParseEnumerationType:
5538
* @ctxt: an XML parser context
5540
* parse an Enumeration attribute type.
5542
* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5544
* [ VC: Enumeration ]
5545
* Values of this type must match one of the Nmtoken tokens in
5548
* Returns: the enumeration attribute tree built while parsing
5552
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5554
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5557
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5564
name = xmlParseNmtoken(ctxt);
5566
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5570
while (tmp != NULL) {
5571
if (xmlStrEqual(name, tmp->name)) {
5572
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5573
"standalone: attribute enumeration value token %s duplicated\n",
5575
if (!xmlDictOwns(ctxt->dict, name))
5582
cur = xmlCreateEnumeration(name);
5583
if (!xmlDictOwns(ctxt->dict, name))
5586
xmlFreeEnumeration(ret);
5589
if (last == NULL) ret = last = cur;
5596
} while (RAW == '|');
5598
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5606
* xmlParseEnumeratedType:
5607
* @ctxt: an XML parser context
5608
* @tree: the enumeration tree built while parsing
5610
* parse an Enumerated attribute type.
5612
* [57] EnumeratedType ::= NotationType | Enumeration
5614
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5617
* Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5621
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5622
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5624
if (!IS_BLANK_CH(CUR)) {
5625
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5626
"Space required after 'NOTATION'\n");
5630
*tree = xmlParseNotationType(ctxt);
5631
if (*tree == NULL) return(0);
5632
return(XML_ATTRIBUTE_NOTATION);
5634
*tree = xmlParseEnumerationType(ctxt);
5635
if (*tree == NULL) return(0);
5636
return(XML_ATTRIBUTE_ENUMERATION);
5640
* xmlParseAttributeType:
5641
* @ctxt: an XML parser context
5642
* @tree: the enumeration tree built while parsing
5644
* parse the Attribute list def for an element
5646
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
5648
* [55] StringType ::= 'CDATA'
5650
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5651
* 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5653
* Validity constraints for attribute values syntax are checked in
5654
* xmlValidateAttributeValue()
5657
* Values of type ID must match the Name production. A name must not
5658
* appear more than once in an XML document as a value of this type;
5659
* i.e., ID values must uniquely identify the elements which bear them.
5661
* [ VC: One ID per Element Type ]
5662
* No element type may have more than one ID attribute specified.
5664
* [ VC: ID Attribute Default ]
5665
* An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5668
* Values of type IDREF must match the Name production, and values
5669
* of type IDREFS must match Names; each IDREF Name must match the value
5670
* of an ID attribute on some element in the XML document; i.e. IDREF
5671
* values must match the value of some ID attribute.
5673
* [ VC: Entity Name ]
5674
* Values of type ENTITY must match the Name production, values
5675
* of type ENTITIES must match Names; each Entity Name must match the
5676
* name of an unparsed entity declared in the DTD.
5678
* [ VC: Name Token ]
5679
* Values of type NMTOKEN must match the Nmtoken production; values
5680
* of type NMTOKENS must match Nmtokens.
5682
* Returns the attribute type
5685
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5687
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5689
return(XML_ATTRIBUTE_CDATA);
5690
} else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5692
return(XML_ATTRIBUTE_IDREFS);
5693
} else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5695
return(XML_ATTRIBUTE_IDREF);
5696
} else if ((RAW == 'I') && (NXT(1) == 'D')) {
5698
return(XML_ATTRIBUTE_ID);
5699
} else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5701
return(XML_ATTRIBUTE_ENTITY);
5702
} else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5704
return(XML_ATTRIBUTE_ENTITIES);
5705
} else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5707
return(XML_ATTRIBUTE_NMTOKENS);
5708
} else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5710
return(XML_ATTRIBUTE_NMTOKEN);
5712
return(xmlParseEnumeratedType(ctxt, tree));
5716
* xmlParseAttributeListDecl:
5717
* @ctxt: an XML parser context
5719
* : parse the Attribute list def for an element
5721
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5723
* [53] AttDef ::= S Name S AttType S DefaultDecl
5727
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5728
const xmlChar *elemName;
5729
const xmlChar *attrName;
5730
xmlEnumerationPtr tree;
5732
if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5733
xmlParserInputPtr input = ctxt->input;
5736
if (!IS_BLANK_CH(CUR)) {
5737
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5738
"Space required after '<!ATTLIST'\n");
5741
elemName = xmlParseName(ctxt);
5742
if (elemName == NULL) {
5743
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5744
"ATTLIST: no name for Element\n");
5749
while (RAW != '>') {
5750
const xmlChar *check = CUR_PTR;
5753
xmlChar *defaultValue = NULL;
5757
attrName = xmlParseName(ctxt);
5758
if (attrName == NULL) {
5759
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5760
"ATTLIST: no name for Attribute\n");
5764
if (!IS_BLANK_CH(CUR)) {
5765
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5766
"Space required after the attribute name\n");
5771
type = xmlParseAttributeType(ctxt, &tree);
5777
if (!IS_BLANK_CH(CUR)) {
5778
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5779
"Space required after the attribute type\n");
5781
xmlFreeEnumeration(tree);
5786
def = xmlParseDefaultDecl(ctxt, &defaultValue);
5788
if (defaultValue != NULL)
5789
xmlFree(defaultValue);
5791
xmlFreeEnumeration(tree);
5794
if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5795
xmlAttrNormalizeSpace(defaultValue, defaultValue);
5799
if (!IS_BLANK_CH(CUR)) {
5800
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5801
"Space required after the attribute default value\n");
5802
if (defaultValue != NULL)
5803
xmlFree(defaultValue);
5805
xmlFreeEnumeration(tree);
5810
if (check == CUR_PTR) {
5811
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5812
"in xmlParseAttributeListDecl\n");
5813
if (defaultValue != NULL)
5814
xmlFree(defaultValue);
5816
xmlFreeEnumeration(tree);
5819
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5820
(ctxt->sax->attributeDecl != NULL))
5821
ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5822
type, def, defaultValue, tree);
5823
else if (tree != NULL)
5824
xmlFreeEnumeration(tree);
5826
if ((ctxt->sax2) && (defaultValue != NULL) &&
5827
(def != XML_ATTRIBUTE_IMPLIED) &&
5828
(def != XML_ATTRIBUTE_REQUIRED)) {
5829
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5832
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5834
if (defaultValue != NULL)
5835
xmlFree(defaultValue);
5839
if (input != ctxt->input) {
5840
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5841
"Attribute list declaration doesn't start and stop in the same entity\n",
5850
* xmlParseElementMixedContentDecl:
5851
* @ctxt: an XML parser context
5852
* @inputchk: the input used for the current entity, needed for boundary checks
5854
* parse the declaration for a Mixed Element content
5855
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5857
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5858
* '(' S? '#PCDATA' S? ')'
5860
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5862
* [ VC: No Duplicate Types ]
5863
* The same name must not appear more than once in a single
5864
* mixed-content declaration.
5866
* returns: the list of the xmlElementContentPtr describing the element choices
5868
xmlElementContentPtr
5869
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5870
xmlElementContentPtr ret = NULL, cur = NULL, n;
5871
const xmlChar *elem = NULL;
5874
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5879
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5880
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5881
"Element content declaration doesn't start and stop in the same entity\n",
5885
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5889
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5894
if ((RAW == '(') || (RAW == '|')) {
5895
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5896
if (ret == NULL) return(NULL);
5898
while (RAW == '|') {
5901
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5902
if (ret == NULL) return(NULL);
5908
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5909
if (n == NULL) return(NULL);
5910
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5919
elem = xmlParseName(ctxt);
5921
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5922
"xmlParseElementMixedContentDecl : Name expected\n");
5923
xmlFreeDocElementContent(ctxt->myDoc, cur);
5929
if ((RAW == ')') && (NXT(1) == '*')) {
5931
cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5932
XML_ELEMENT_CONTENT_ELEMENT);
5933
if (cur->c2 != NULL)
5934
cur->c2->parent = cur;
5937
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5938
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5939
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5940
"Element content declaration doesn't start and stop in the same entity\n",
5945
xmlFreeDocElementContent(ctxt->myDoc, ret);
5946
xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5951
xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5957
* xmlParseElementChildrenContentDeclPriv:
5958
* @ctxt: an XML parser context
5959
* @inputchk: the input used for the current entity, needed for boundary checks
5960
* @depth: the level of recursion
5962
* parse the declaration for a Mixed Element content
5963
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5966
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
5968
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5970
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5972
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5974
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5975
* TODO Parameter-entity replacement text must be properly nested
5976
* with parenthesized groups. That is to say, if either of the
5977
* opening or closing parentheses in a choice, seq, or Mixed
5978
* construct is contained in the replacement text for a parameter
5979
* entity, both must be contained in the same replacement text. For
5980
* interoperability, if a parameter-entity reference appears in a
5981
* choice, seq, or Mixed construct, its replacement text should not
5982
* be empty, and neither the first nor last non-blank character of
5983
* the replacement text should be a connector (| or ,).
5985
* Returns the tree of xmlElementContentPtr describing the element
5988
static xmlElementContentPtr
5989
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5991
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5992
const xmlChar *elem;
5995
if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5997
xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5998
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6005
int inputid = ctxt->input->id;
6007
/* Recurse on first child */
6010
cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6015
elem = xmlParseName(ctxt);
6017
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6020
cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6022
xmlErrMemory(ctxt, NULL);
6027
cur->ocur = XML_ELEMENT_CONTENT_OPT;
6029
} else if (RAW == '*') {
6030
cur->ocur = XML_ELEMENT_CONTENT_MULT;
6032
} else if (RAW == '+') {
6033
cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6036
cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6042
while (RAW != ')') {
6044
* Each loop we parse one separator and one element.
6047
if (type == 0) type = CUR;
6050
* Detect "Name | Name , Name" error
6052
else if (type != CUR) {
6053
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6054
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6056
if ((last != NULL) && (last != ret))
6057
xmlFreeDocElementContent(ctxt->myDoc, last);
6059
xmlFreeDocElementContent(ctxt->myDoc, ret);
6064
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6066
if ((last != NULL) && (last != ret))
6067
xmlFreeDocElementContent(ctxt->myDoc, last);
6068
xmlFreeDocElementContent(ctxt->myDoc, ret);
6086
} else if (RAW == '|') {
6087
if (type == 0) type = CUR;
6090
* Detect "Name , Name | Name" error
6092
else if (type != CUR) {
6093
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6094
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6096
if ((last != NULL) && (last != ret))
6097
xmlFreeDocElementContent(ctxt->myDoc, last);
6099
xmlFreeDocElementContent(ctxt->myDoc, ret);
6104
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6106
if ((last != NULL) && (last != ret))
6107
xmlFreeDocElementContent(ctxt->myDoc, last);
6109
xmlFreeDocElementContent(ctxt->myDoc, ret);
6128
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6129
if ((last != NULL) && (last != ret))
6130
xmlFreeDocElementContent(ctxt->myDoc, last);
6132
xmlFreeDocElementContent(ctxt->myDoc, ret);
6139
int inputid = ctxt->input->id;
6140
/* Recurse on second child */
6143
last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6147
elem = xmlParseName(ctxt);
6149
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6151
xmlFreeDocElementContent(ctxt->myDoc, ret);
6154
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6157
xmlFreeDocElementContent(ctxt->myDoc, ret);
6161
last->ocur = XML_ELEMENT_CONTENT_OPT;
6163
} else if (RAW == '*') {
6164
last->ocur = XML_ELEMENT_CONTENT_MULT;
6166
} else if (RAW == '+') {
6167
last->ocur = XML_ELEMENT_CONTENT_PLUS;
6170
last->ocur = XML_ELEMENT_CONTENT_ONCE;
6176
if ((cur != NULL) && (last != NULL)) {
6181
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6182
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6183
"Element content declaration doesn't start and stop in the same entity\n",
6189
if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6190
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6191
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6193
ret->ocur = XML_ELEMENT_CONTENT_OPT;
6196
} else if (RAW == '*') {
6198
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6201
* Some normalization:
6202
* (a | b* | c?)* == (a | b | c)*
6204
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6205
if ((cur->c1 != NULL) &&
6206
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6207
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6208
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6209
if ((cur->c2 != NULL) &&
6210
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6211
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6212
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6217
} else if (RAW == '+') {
6221
if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6222
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6223
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6225
ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6227
* Some normalization:
6228
* (a | b*)+ == (a | b)*
6229
* (a | b?)+ == (a | b)*
6231
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6232
if ((cur->c1 != NULL) &&
6233
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6234
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6235
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6238
if ((cur->c2 != NULL) &&
6239
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6240
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6241
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6247
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6255
* xmlParseElementChildrenContentDecl:
6256
* @ctxt: an XML parser context
6257
* @inputchk: the input used for the current entity, needed for boundary checks
6259
* parse the declaration for a Mixed Element content
6260
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6262
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6264
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6266
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6268
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6270
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6271
* TODO Parameter-entity replacement text must be properly nested
6272
* with parenthesized groups. That is to say, if either of the
6273
* opening or closing parentheses in a choice, seq, or Mixed
6274
* construct is contained in the replacement text for a parameter
6275
* entity, both must be contained in the same replacement text. For
6276
* interoperability, if a parameter-entity reference appears in a
6277
* choice, seq, or Mixed construct, its replacement text should not
6278
* be empty, and neither the first nor last non-blank character of
6279
* the replacement text should be a connector (| or ,).
6281
* Returns the tree of xmlElementContentPtr describing the element
6284
xmlElementContentPtr
6285
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6286
/* stub left for API/ABI compat */
6287
return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6291
* xmlParseElementContentDecl:
6292
* @ctxt: an XML parser context
6293
* @name: the name of the element being defined.
6294
* @result: the Element Content pointer will be stored here if any
6296
* parse the declaration for an Element content either Mixed or Children,
6297
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6299
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6301
* returns: the type of element content XML_ELEMENT_TYPE_xxx
6305
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6306
xmlElementContentPtr *result) {
6308
xmlElementContentPtr tree = NULL;
6309
int inputid = ctxt->input->id;
6315
xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6316
"xmlParseElementContentDecl : %s '(' expected\n", name);
6322
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6323
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6324
res = XML_ELEMENT_TYPE_MIXED;
6326
tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6327
res = XML_ELEMENT_TYPE_ELEMENT;
6335
* xmlParseElementDecl:
6336
* @ctxt: an XML parser context
6338
* parse an Element declaration.
6340
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6342
* [ VC: Unique Element Type Declaration ]
6343
* No element type may be declared more than once
6345
* Returns the type of the element, or -1 in case of error
6348
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6349
const xmlChar *name;
6351
xmlElementContentPtr content = NULL;
6353
/* GROW; done in the caller */
6354
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6355
xmlParserInputPtr input = ctxt->input;
6358
if (!IS_BLANK_CH(CUR)) {
6359
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6360
"Space required after 'ELEMENT'\n");
6363
name = xmlParseName(ctxt);
6365
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6366
"xmlParseElementDecl: no name for Element\n");
6369
while ((RAW == 0) && (ctxt->inputNr > 1))
6371
if (!IS_BLANK_CH(CUR)) {
6372
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6373
"Space required after the element name\n");
6376
if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6379
* Element must always be empty.
6381
ret = XML_ELEMENT_TYPE_EMPTY;
6382
} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6386
* Element is a generic container.
6388
ret = XML_ELEMENT_TYPE_ANY;
6389
} else if (RAW == '(') {
6390
ret = xmlParseElementContentDecl(ctxt, name, &content);
6393
* [ WFC: PEs in Internal Subset ] error handling.
6395
if ((RAW == '%') && (ctxt->external == 0) &&
6396
(ctxt->inputNr == 1)) {
6397
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6398
"PEReference: forbidden within markup decl in internal subset\n");
6400
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6401
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6408
* Pop-up of finished entities.
6410
while ((RAW == 0) && (ctxt->inputNr > 1))
6415
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6416
if (content != NULL) {
6417
xmlFreeDocElementContent(ctxt->myDoc, content);
6420
if (input != ctxt->input) {
6421
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6422
"Element declaration doesn't start and stop in the same entity\n");
6426
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6427
(ctxt->sax->elementDecl != NULL)) {
6428
if (content != NULL)
6429
content->parent = NULL;
6430
ctxt->sax->elementDecl(ctxt->userData, name, ret,
6432
if ((content != NULL) && (content->parent == NULL)) {
6434
* this is a trick: if xmlAddElementDecl is called,
6435
* instead of copying the full tree it is plugged directly
6436
* if called from the parser. Avoid duplicating the
6437
* interfaces or change the API/ABI
6439
xmlFreeDocElementContent(ctxt->myDoc, content);
6441
} else if (content != NULL) {
6442
xmlFreeDocElementContent(ctxt->myDoc, content);
6450
* xmlParseConditionalSections
6451
* @ctxt: an XML parser context
6453
* [61] conditionalSect ::= includeSect | ignoreSect
6454
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6455
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6456
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6457
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6461
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6462
int id = ctxt->input->id;
6466
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6470
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6472
if (ctxt->input->id != id) {
6473
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474
"All markup of the conditional section is not in the same entity\n",
6479
if (xmlParserDebugEntities) {
6480
if ((ctxt->input != NULL) && (ctxt->input->filename))
6481
xmlGenericError(xmlGenericErrorContext,
6482
"%s(%d): ", ctxt->input->filename,
6484
xmlGenericError(xmlGenericErrorContext,
6485
"Entering INCLUDE Conditional Section\n");
6488
while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6490
const xmlChar *check = CUR_PTR;
6491
unsigned int cons = ctxt->input->consumed;
6493
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6494
xmlParseConditionalSections(ctxt);
6495
} else if (IS_BLANK_CH(CUR)) {
6497
} else if (RAW == '%') {
6498
xmlParsePEReference(ctxt);
6500
xmlParseMarkupDecl(ctxt);
6503
* Pop-up of finished entities.
6505
while ((RAW == 0) && (ctxt->inputNr > 1))
6508
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6509
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6513
if (xmlParserDebugEntities) {
6514
if ((ctxt->input != NULL) && (ctxt->input->filename))
6515
xmlGenericError(xmlGenericErrorContext,
6516
"%s(%d): ", ctxt->input->filename,
6518
xmlGenericError(xmlGenericErrorContext,
6519
"Leaving INCLUDE Conditional Section\n");
6522
} else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6524
xmlParserInputState instate;
6530
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6532
if (ctxt->input->id != id) {
6533
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6534
"All markup of the conditional section is not in the same entity\n",
6539
if (xmlParserDebugEntities) {
6540
if ((ctxt->input != NULL) && (ctxt->input->filename))
6541
xmlGenericError(xmlGenericErrorContext,
6542
"%s(%d): ", ctxt->input->filename,
6544
xmlGenericError(xmlGenericErrorContext,
6545
"Entering IGNORE Conditional Section\n");
6549
* Parse up to the end of the conditional section
6550
* But disable SAX event generating DTD building in the meantime
6552
state = ctxt->disableSAX;
6553
instate = ctxt->instate;
6554
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6555
ctxt->instate = XML_PARSER_IGNORE;
6557
while ((depth >= 0) && (RAW != 0)) {
6558
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6563
if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6564
if (--depth >= 0) SKIP(3);
6571
ctxt->disableSAX = state;
6572
ctxt->instate = instate;
6574
if (xmlParserDebugEntities) {
6575
if ((ctxt->input != NULL) && (ctxt->input->filename))
6576
xmlGenericError(xmlGenericErrorContext,
6577
"%s(%d): ", ctxt->input->filename,
6579
xmlGenericError(xmlGenericErrorContext,
6580
"Leaving IGNORE Conditional Section\n");
6584
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6591
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6593
if (ctxt->input->id != id) {
6594
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6595
"All markup of the conditional section is not in the same entity\n",
6603
* xmlParseMarkupDecl:
6604
* @ctxt: an XML parser context
6606
* parse Markup declarations
6608
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6609
* NotationDecl | PI | Comment
6611
* [ VC: Proper Declaration/PE Nesting ]
6612
* Parameter-entity replacement text must be properly nested with
6613
* markup declarations. That is to say, if either the first character
6614
* or the last character of a markup declaration (markupdecl above) is
6615
* contained in the replacement text for a parameter-entity reference,
6616
* both must be contained in the same replacement text.
6618
* [ WFC: PEs in Internal Subset ]
6619
* In the internal DTD subset, parameter-entity references can occur
6620
* only where markup declarations can occur, not within markup declarations.
6621
* (This does not apply to references that occur in external parameter
6622
* entities or to the external subset.)
6625
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6628
if (NXT(1) == '!') {
6632
xmlParseElementDecl(ctxt);
6633
else if (NXT(3) == 'N')
6634
xmlParseEntityDecl(ctxt);
6637
xmlParseAttributeListDecl(ctxt);
6640
xmlParseNotationDecl(ctxt);
6643
xmlParseComment(ctxt);
6646
/* there is an error but it will be detected later */
6649
} else if (NXT(1) == '?') {
6654
* This is only for internal subset. On external entities,
6655
* the replacement is done before parsing stage
6657
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6658
xmlParsePEReference(ctxt);
6661
* Conditional sections are allowed from entities included
6662
* by PE References in the internal subset.
6664
if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6665
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6666
xmlParseConditionalSections(ctxt);
6670
ctxt->instate = XML_PARSER_DTD;
6675
* @ctxt: an XML parser context
6677
* parse an XML declaration header for external entities
6679
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6683
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6685
const xmlChar *encoding;
6688
* We know that '<?xml' is here.
6690
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6693
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6697
if (!IS_BLANK_CH(CUR)) {
6698
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6699
"Space needed after '<?xml'\n");
6704
* We may have the VersionInfo here.
6706
version = xmlParseVersionInfo(ctxt);
6707
if (version == NULL)
6708
version = xmlCharStrdup(XML_DEFAULT_VERSION);
6710
if (!IS_BLANK_CH(CUR)) {
6711
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6712
"Space needed here\n");
6715
ctxt->input->version = version;
6718
* We must have the encoding declaration
6720
encoding = xmlParseEncodingDecl(ctxt);
6721
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6723
* The XML REC instructs us to stop parsing right here
6727
if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6728
xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6729
"Missing encoding in text declaration\n");
6733
if ((RAW == '?') && (NXT(1) == '>')) {
6735
} else if (RAW == '>') {
6736
/* Deprecated old WD ... */
6737
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6740
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6741
MOVETO_ENDTAG(CUR_PTR);
6747
* xmlParseExternalSubset:
6748
* @ctxt: an XML parser context
6749
* @ExternalID: the external identifier
6750
* @SystemID: the system identifier (or URL)
6752
* parse Markup declarations from an external subset
6754
* [30] extSubset ::= textDecl? extSubsetDecl
6756
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6759
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6760
const xmlChar *SystemID) {
6761
xmlDetectSAX2(ctxt);
6764
if ((ctxt->encoding == NULL) &&
6765
(ctxt->input->end - ctxt->input->cur >= 4)) {
6767
xmlCharEncoding enc;
6773
enc = xmlDetectCharEncoding(start, 4);
6774
if (enc != XML_CHAR_ENCODING_NONE)
6775
xmlSwitchEncoding(ctxt, enc);
6778
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6779
xmlParseTextDecl(ctxt);
6780
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6782
* The XML REC instructs us to stop parsing right here
6784
ctxt->instate = XML_PARSER_EOF;
6788
if (ctxt->myDoc == NULL) {
6789
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6790
if (ctxt->myDoc == NULL) {
6791
xmlErrMemory(ctxt, "New Doc failed");
6794
ctxt->myDoc->properties = XML_DOC_INTERNAL;
6796
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6797
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6799
ctxt->instate = XML_PARSER_DTD;
6801
while (((RAW == '<') && (NXT(1) == '?')) ||
6802
((RAW == '<') && (NXT(1) == '!')) ||
6803
(RAW == '%') || IS_BLANK_CH(CUR)) {
6804
const xmlChar *check = CUR_PTR;
6805
unsigned int cons = ctxt->input->consumed;
6808
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6809
xmlParseConditionalSections(ctxt);
6810
} else if (IS_BLANK_CH(CUR)) {
6812
} else if (RAW == '%') {
6813
xmlParsePEReference(ctxt);
6815
xmlParseMarkupDecl(ctxt);
6818
* Pop-up of finished entities.
6820
while ((RAW == 0) && (ctxt->inputNr > 1))
6823
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6824
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6830
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6836
* xmlParseReference:
6837
* @ctxt: an XML parser context
6839
* parse and handle entity references in content, depending on the SAX
6840
* interface, this may end-up in a call to character() if this is a
6841
* CharRef, a predefined entity, if there is no reference() callback.
6842
* or if the parser was asked to switch to that mode.
6844
* [67] Reference ::= EntityRef | CharRef
6847
xmlParseReference(xmlParserCtxtPtr ctxt) {
6851
xmlNodePtr list = NULL;
6852
xmlParserErrors ret = XML_ERR_OK;
6859
* Simple case of a CharRef
6861
if (NXT(1) == '#') {
6865
int value = xmlParseCharRef(ctxt);
6869
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6871
* So we are using non-UTF-8 buffers
6872
* Check that the char fit on 8bits, if not
6873
* generate a CharRef.
6875
if (value <= 0xFF) {
6878
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6879
(!ctxt->disableSAX))
6880
ctxt->sax->characters(ctxt->userData, out, 1);
6882
if ((hex == 'x') || (hex == 'X'))
6883
snprintf((char *)out, sizeof(out), "#x%X", value);
6885
snprintf((char *)out, sizeof(out), "#%d", value);
6886
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6887
(!ctxt->disableSAX))
6888
ctxt->sax->reference(ctxt->userData, out);
6892
* Just encode the value in UTF-8
6894
COPY_BUF(0 ,out, i, value);
6896
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897
(!ctxt->disableSAX))
6898
ctxt->sax->characters(ctxt->userData, out, i);
6904
* We are seeing an entity reference
6906
ent = xmlParseEntityRef(ctxt);
6907
if (ent == NULL) return;
6908
if (!ctxt->wellFormed)
6910
was_checked = ent->checked;
6912
/* special case of predefined entities */
6913
if ((ent->name == NULL) ||
6914
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6916
if (val == NULL) return;
6918
* inline the entity.
6920
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6921
(!ctxt->disableSAX))
6922
ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6927
* The first reference to the entity trigger a parsing phase
6928
* where the ent->children is filled with the result from
6931
if (ent->checked == 0) {
6932
unsigned long oldnbent = ctxt->nbentities;
6935
* This is a bit hackish but this seems the best
6936
* way to make sure both SAX and DOM entity support
6940
if (ctxt->userData == ctxt)
6943
user_data = ctxt->userData;
6946
* Check that this entity is well formed
6947
* 4.3.2: An internal general parsed entity is well-formed
6948
* if its replacement text matches the production labeled
6951
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6953
ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6957
} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6959
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6960
user_data, ctxt->depth, ent->URI,
6961
ent->ExternalID, &list);
6964
ret = XML_ERR_ENTITY_PE_INTERNAL;
6965
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6966
"invalid entity type found\n", NULL);
6970
* Store the number of entities needing parsing for this entity
6971
* content and do checkings
6973
ent->checked = ctxt->nbentities - oldnbent;
6974
if (ret == XML_ERR_ENTITY_LOOP) {
6975
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6976
xmlFreeNodeList(list);
6979
if (xmlParserEntityCheck(ctxt, 0, ent)) {
6980
xmlFreeNodeList(list);
6984
if ((ret == XML_ERR_OK) && (list != NULL)) {
6985
if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6986
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6987
(ent->children == NULL)) {
6988
ent->children = list;
6989
if (ctxt->replaceEntities) {
6991
* Prune it directly in the generated document
6992
* except for single text nodes.
6994
if (((list->type == XML_TEXT_NODE) &&
6995
(list->next == NULL)) ||
6996
(ctxt->parseMode == XML_PARSE_READER)) {
6997
list->parent = (xmlNodePtr) ent;
7002
while (list != NULL) {
7003
list->parent = (xmlNodePtr) ctxt->node;
7004
list->doc = ctxt->myDoc;
7005
if (list->next == NULL)
7009
list = ent->children;
7010
#ifdef LIBXML_LEGACY_ENABLED
7011
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7012
xmlAddEntityReference(ent, list, NULL);
7013
#endif /* LIBXML_LEGACY_ENABLED */
7017
while (list != NULL) {
7018
list->parent = (xmlNodePtr) ent;
7019
xmlSetTreeDoc(list, ent->doc);
7020
if (list->next == NULL)
7026
xmlFreeNodeList(list);
7029
} else if ((ret != XML_ERR_OK) &&
7030
(ret != XML_WAR_UNDECLARED_ENTITY)) {
7031
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7032
"Entity '%s' failed to parse\n", ent->name);
7033
} else if (list != NULL) {
7034
xmlFreeNodeList(list);
7037
if (ent->checked == 0)
7039
} else if (ent->checked != 1) {
7040
ctxt->nbentities += ent->checked;
7044
* Now that the entity content has been gathered
7045
* provide it to the application, this can take different forms based
7046
* on the parsing modes.
7048
if (ent->children == NULL) {
7050
* Probably running in SAX mode and the callbacks don't
7051
* build the entity content. So unless we already went
7052
* though parsing for first checking go though the entity
7053
* content to generate callbacks associated to the entity
7055
if (was_checked != 0) {
7058
* This is a bit hackish but this seems the best
7059
* way to make sure both SAX and DOM entity support
7062
if (ctxt->userData == ctxt)
7065
user_data = ctxt->userData;
7067
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7069
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7070
ent->content, user_data, NULL);
7072
} else if (ent->etype ==
7073
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7075
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7076
ctxt->sax, user_data, ctxt->depth,
7077
ent->URI, ent->ExternalID, NULL);
7080
ret = XML_ERR_ENTITY_PE_INTERNAL;
7081
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7082
"invalid entity type found\n", NULL);
7084
if (ret == XML_ERR_ENTITY_LOOP) {
7085
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7089
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7090
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7092
* Entity reference callback comes second, it's somewhat
7093
* superfluous but a compatibility to historical behaviour
7095
ctxt->sax->reference(ctxt->userData, ent->name);
7101
* If we didn't get any children for the entity being built
7103
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7104
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7108
ctxt->sax->reference(ctxt->userData, ent->name);
7112
if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7114
* There is a problem on the handling of _private for entities
7115
* (bug 155816): Should we copy the content of the field from
7116
* the entity (possibly overwriting some value set by the user
7117
* when a copy is created), should we leave it alone, or should
7118
* we try to take care of different situations? The problem
7119
* is exacerbated by the usage of this field by the xmlReader.
7120
* To fix this bug, we look at _private on the created node
7121
* and, if it's NULL, we copy in whatever was in the entity.
7122
* If it's not NULL we leave it alone. This is somewhat of a
7123
* hack - maybe we should have further tests to determine
7126
if ((ctxt->node != NULL) && (ent->children != NULL)) {
7128
* Seems we are generating the DOM content, do
7129
* a simple tree copy for all references except the first
7130
* In the first occurrence list contains the replacement.
7131
* progressive == 2 means we are operating on the Reader
7132
* and since nodes are discarded we must copy all the time.
7134
if (((list == NULL) && (ent->owner == 0)) ||
7135
(ctxt->parseMode == XML_PARSE_READER)) {
7136
xmlNodePtr nw = NULL, cur, firstChild = NULL;
7139
* when operating on a reader, the entities definitions
7140
* are always owning the entities subtree.
7141
if (ctxt->parseMode == XML_PARSE_READER)
7145
cur = ent->children;
7146
while (cur != NULL) {
7147
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7149
if (nw->_private == NULL)
7150
nw->_private = cur->_private;
7151
if (firstChild == NULL){
7154
nw = xmlAddChild(ctxt->node, nw);
7156
if (cur == ent->last) {
7158
* needed to detect some strange empty
7159
* node cases in the reader tests
7161
if ((ctxt->parseMode == XML_PARSE_READER) &&
7163
(nw->type == XML_ELEMENT_NODE) &&
7164
(nw->children == NULL))
7171
#ifdef LIBXML_LEGACY_ENABLED
7172
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7173
xmlAddEntityReference(ent, firstChild, nw);
7174
#endif /* LIBXML_LEGACY_ENABLED */
7175
} else if (list == NULL) {
7176
xmlNodePtr nw = NULL, cur, next, last,
7179
* Copy the entity child list and make it the new
7180
* entity child list. The goal is to make sure any
7181
* ID or REF referenced will be the one from the
7182
* document content and not the entity copy.
7184
cur = ent->children;
7185
ent->children = NULL;
7188
while (cur != NULL) {
7192
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7194
if (nw->_private == NULL)
7195
nw->_private = cur->_private;
7196
if (firstChild == NULL){
7199
xmlAddChild((xmlNodePtr) ent, nw);
7200
xmlAddChild(ctxt->node, cur);
7206
if (ent->owner == 0)
7208
#ifdef LIBXML_LEGACY_ENABLED
7209
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7210
xmlAddEntityReference(ent, firstChild, nw);
7211
#endif /* LIBXML_LEGACY_ENABLED */
7213
const xmlChar *nbktext;
7216
* the name change is to avoid coalescing of the
7217
* node with a possible previous text one which
7218
* would make ent->children a dangling pointer
7220
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7222
if (ent->children->type == XML_TEXT_NODE)
7223
ent->children->name = nbktext;
7224
if ((ent->last != ent->children) &&
7225
(ent->last->type == XML_TEXT_NODE))
7226
ent->last->name = nbktext;
7227
xmlAddChildList(ctxt->node, ent->children);
7231
* This is to avoid a nasty side effect, see
7232
* characters() in SAX.c
7242
* xmlParseEntityRef:
7243
* @ctxt: an XML parser context
7245
* parse ENTITY references declarations
7247
* [68] EntityRef ::= '&' Name ';'
7249
* [ WFC: Entity Declared ]
7250
* In a document without any DTD, a document with only an internal DTD
7251
* subset which contains no parameter entity references, or a document
7252
* with "standalone='yes'", the Name given in the entity reference
7253
* must match that in an entity declaration, except that well-formed
7254
* documents need not declare any of the following entities: amp, lt,
7255
* gt, apos, quot. The declaration of a parameter entity must precede
7256
* any reference to it. Similarly, the declaration of a general entity
7257
* must precede any reference to it which appears in a default value in an
7258
* attribute-list declaration. Note that if entities are declared in the
7259
* external subset or in external parameter entities, a non-validating
7260
* processor is not obligated to read and process their declarations;
7261
* for such documents, the rule that an entity must be declared is a
7262
* well-formedness constraint only if standalone='yes'.
7264
* [ WFC: Parsed Entity ]
7265
* An entity reference must not contain the name of an unparsed entity
7267
* Returns the xmlEntityPtr if found, or NULL otherwise.
7270
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7271
const xmlChar *name;
7272
xmlEntityPtr ent = NULL;
7279
name = xmlParseName(ctxt);
7281
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7282
"xmlParseEntityRef: no name\n");
7286
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7292
* Predefined entites override any extra definition
7294
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7295
ent = xmlGetPredefinedEntity(name);
7301
* Increate the number of entity references parsed
7306
* Ask first SAX for entity resolution, otherwise try the
7307
* entities which may have stored in the parser context.
7309
if (ctxt->sax != NULL) {
7310
if (ctxt->sax->getEntity != NULL)
7311
ent = ctxt->sax->getEntity(ctxt->userData, name);
7312
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7313
(ctxt->options & XML_PARSE_OLDSAX))
7314
ent = xmlGetPredefinedEntity(name);
7315
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7316
(ctxt->userData==ctxt)) {
7317
ent = xmlSAX2GetEntity(ctxt, name);
7321
* [ WFC: Entity Declared ]
7322
* In a document without any DTD, a document with only an
7323
* internal DTD subset which contains no parameter entity
7324
* references, or a document with "standalone='yes'", the
7325
* Name given in the entity reference must match that in an
7326
* entity declaration, except that well-formed documents
7327
* need not declare any of the following entities: amp, lt,
7329
* The declaration of a parameter entity must precede any
7331
* Similarly, the declaration of a general entity must
7332
* precede any reference to it which appears in a default
7333
* value in an attribute-list declaration. Note that if
7334
* entities are declared in the external subset or in
7335
* external parameter entities, a non-validating processor
7336
* is not obligated to read and process their declarations;
7337
* for such documents, the rule that an entity must be
7338
* declared is a well-formedness constraint only if
7342
if ((ctxt->standalone == 1) ||
7343
((ctxt->hasExternalSubset == 0) &&
7344
(ctxt->hasPErefs == 0))) {
7345
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7346
"Entity '%s' not defined\n", name);
7348
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7349
"Entity '%s' not defined\n", name);
7350
if ((ctxt->inSubset == 0) &&
7351
(ctxt->sax != NULL) &&
7352
(ctxt->sax->reference != NULL)) {
7353
ctxt->sax->reference(ctxt->userData, name);
7360
* [ WFC: Parsed Entity ]
7361
* An entity reference must not contain the name of an
7364
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7365
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7366
"Entity reference to unparsed entity %s\n", name);
7370
* [ WFC: No External Entity References ]
7371
* Attribute values cannot contain direct or indirect
7372
* entity references to external entities.
7374
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7375
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7376
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7377
"Attribute references external entity '%s'\n", name);
7380
* [ WFC: No < in Attribute Values ]
7381
* The replacement text of any entity referred to directly or
7382
* indirectly in an attribute value (other than "<") must
7385
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7386
(ent != NULL) && (ent->content != NULL) &&
7387
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7388
(xmlStrchr(ent->content, '<'))) {
7389
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7390
"'<' in entity '%s' is not allowed in attributes values\n", name);
7394
* Internal check, no parameter entities here ...
7397
switch (ent->etype) {
7398
case XML_INTERNAL_PARAMETER_ENTITY:
7399
case XML_EXTERNAL_PARAMETER_ENTITY:
7400
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7401
"Attempt to reference the parameter entity '%s'\n",
7410
* [ WFC: No Recursion ]
7411
* A parsed entity must not contain a recursive reference
7412
* to itself, either directly or indirectly.
7413
* Done somewhere else
7419
* xmlParseStringEntityRef:
7420
* @ctxt: an XML parser context
7421
* @str: a pointer to an index in the string
7423
* parse ENTITY references declarations, but this version parses it from
7426
* [68] EntityRef ::= '&' Name ';'
7428
* [ WFC: Entity Declared ]
7429
* In a document without any DTD, a document with only an internal DTD
7430
* subset which contains no parameter entity references, or a document
7431
* with "standalone='yes'", the Name given in the entity reference
7432
* must match that in an entity declaration, except that well-formed
7433
* documents need not declare any of the following entities: amp, lt,
7434
* gt, apos, quot. The declaration of a parameter entity must precede
7435
* any reference to it. Similarly, the declaration of a general entity
7436
* must precede any reference to it which appears in a default value in an
7437
* attribute-list declaration. Note that if entities are declared in the
7438
* external subset or in external parameter entities, a non-validating
7439
* processor is not obligated to read and process their declarations;
7440
* for such documents, the rule that an entity must be declared is a
7441
* well-formedness constraint only if standalone='yes'.
7443
* [ WFC: Parsed Entity ]
7444
* An entity reference must not contain the name of an unparsed entity
7446
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7447
* is updated to the current location in the string.
7450
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7454
xmlEntityPtr ent = NULL;
7456
if ((str == NULL) || (*str == NULL))
7464
name = xmlParseStringName(ctxt, &ptr);
7466
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7467
"xmlParseStringEntityRef: no name\n");
7472
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7481
* Predefined entites override any extra definition
7483
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7484
ent = xmlGetPredefinedEntity(name);
7493
* Increate the number of entity references parsed
7498
* Ask first SAX for entity resolution, otherwise try the
7499
* entities which may have stored in the parser context.
7501
if (ctxt->sax != NULL) {
7502
if (ctxt->sax->getEntity != NULL)
7503
ent = ctxt->sax->getEntity(ctxt->userData, name);
7504
if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7505
ent = xmlGetPredefinedEntity(name);
7506
if ((ent == NULL) && (ctxt->userData==ctxt)) {
7507
ent = xmlSAX2GetEntity(ctxt, name);
7512
* [ WFC: Entity Declared ]
7513
* In a document without any DTD, a document with only an
7514
* internal DTD subset which contains no parameter entity
7515
* references, or a document with "standalone='yes'", the
7516
* Name given in the entity reference must match that in an
7517
* entity declaration, except that well-formed documents
7518
* need not declare any of the following entities: amp, lt,
7520
* The declaration of a parameter entity must precede any
7522
* Similarly, the declaration of a general entity must
7523
* precede any reference to it which appears in a default
7524
* value in an attribute-list declaration. Note that if
7525
* entities are declared in the external subset or in
7526
* external parameter entities, a non-validating processor
7527
* is not obligated to read and process their declarations;
7528
* for such documents, the rule that an entity must be
7529
* declared is a well-formedness constraint only if
7533
if ((ctxt->standalone == 1) ||
7534
((ctxt->hasExternalSubset == 0) &&
7535
(ctxt->hasPErefs == 0))) {
7536
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7537
"Entity '%s' not defined\n", name);
7539
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7540
"Entity '%s' not defined\n",
7543
/* TODO ? check regressions ctxt->valid = 0; */
7547
* [ WFC: Parsed Entity ]
7548
* An entity reference must not contain the name of an
7551
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7552
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7553
"Entity reference to unparsed entity %s\n", name);
7557
* [ WFC: No External Entity References ]
7558
* Attribute values cannot contain direct or indirect
7559
* entity references to external entities.
7561
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7562
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7563
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7564
"Attribute references external entity '%s'\n", name);
7567
* [ WFC: No < in Attribute Values ]
7568
* The replacement text of any entity referred to directly or
7569
* indirectly in an attribute value (other than "<") must
7572
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7573
(ent != NULL) && (ent->content != NULL) &&
7574
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7575
(xmlStrchr(ent->content, '<'))) {
7576
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7577
"'<' in entity '%s' is not allowed in attributes values\n",
7582
* Internal check, no parameter entities here ...
7585
switch (ent->etype) {
7586
case XML_INTERNAL_PARAMETER_ENTITY:
7587
case XML_EXTERNAL_PARAMETER_ENTITY:
7588
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7589
"Attempt to reference the parameter entity '%s'\n",
7598
* [ WFC: No Recursion ]
7599
* A parsed entity must not contain a recursive reference
7600
* to itself, either directly or indirectly.
7601
* Done somewhere else
7610
* xmlParsePEReference:
7611
* @ctxt: an XML parser context
7613
* parse PEReference declarations
7614
* The entity content is handled directly by pushing it's content as
7615
* a new input stream.
7617
* [69] PEReference ::= '%' Name ';'
7619
* [ WFC: No Recursion ]
7620
* A parsed entity must not contain a recursive
7621
* reference to itself, either directly or indirectly.
7623
* [ WFC: Entity Declared ]
7624
* In a document without any DTD, a document with only an internal DTD
7625
* subset which contains no parameter entity references, or a document
7626
* with "standalone='yes'", ... ... The declaration of a parameter
7627
* entity must precede any reference to it...
7629
* [ VC: Entity Declared ]
7630
* In a document with an external subset or external parameter entities
7631
* with "standalone='no'", ... ... The declaration of a parameter entity
7632
* must precede any reference to it...
7635
* Parameter-entity references may only appear in the DTD.
7636
* NOTE: misleading but this is handled.
7639
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7641
const xmlChar *name;
7642
xmlEntityPtr entity = NULL;
7643
xmlParserInputPtr input;
7648
name = xmlParseName(ctxt);
7650
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7651
"xmlParsePEReference: no name\n");
7655
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7662
* Increate the number of entity references parsed
7667
* Request the entity from SAX
7669
if ((ctxt->sax != NULL) &&
7670
(ctxt->sax->getParameterEntity != NULL))
7671
entity = ctxt->sax->getParameterEntity(ctxt->userData,
7673
if (entity == NULL) {
7675
* [ WFC: Entity Declared ]
7676
* In a document without any DTD, a document with only an
7677
* internal DTD subset which contains no parameter entity
7678
* references, or a document with "standalone='yes'", ...
7679
* ... The declaration of a parameter entity must precede
7680
* any reference to it...
7682
if ((ctxt->standalone == 1) ||
7683
((ctxt->hasExternalSubset == 0) &&
7684
(ctxt->hasPErefs == 0))) {
7685
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7686
"PEReference: %%%s; not found\n",
7690
* [ VC: Entity Declared ]
7691
* In a document with an external subset or external
7692
* parameter entities with "standalone='no'", ...
7693
* ... The declaration of a parameter entity must
7694
* precede any reference to it...
7696
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
"PEReference: %%%s; not found\n",
7703
* Internal checking in case the entity quest barfed
7705
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7706
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7707
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7708
"Internal: %%%s; is not a parameter entity\n",
7710
} else if (ctxt->input->free != deallocblankswrapper) {
7711
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7712
if (xmlPushInput(ctxt, input) < 0)
7717
* handle the extra spaces added before and after
7718
* c.f. http://www.w3.org/TR/REC-xml#as-PE
7720
input = xmlNewEntityInputStream(ctxt, entity);
7721
if (xmlPushInput(ctxt, input) < 0)
7723
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7724
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7725
(IS_BLANK_CH(NXT(5)))) {
7726
xmlParseTextDecl(ctxt);
7728
XML_ERR_UNSUPPORTED_ENCODING) {
7730
* The XML REC instructs us to stop parsing
7733
ctxt->instate = XML_PARSER_EOF;
7739
ctxt->hasPErefs = 1;
7743
* xmlLoadEntityContent:
7744
* @ctxt: an XML parser context
7745
* @entity: an unloaded system entity
7747
* Load the original content of the given system entity from the
7748
* ExternalID/SystemID given. This is to be used for Included in Literal
7749
* http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7751
* Returns 0 in case of success and -1 in case of failure
7754
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7755
xmlParserInputPtr input;
7760
if ((ctxt == NULL) || (entity == NULL) ||
7761
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7762
(entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7763
(entity->content != NULL)) {
7764
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7765
"xmlLoadEntityContent parameter error");
7769
if (xmlParserDebugEntities)
7770
xmlGenericError(xmlGenericErrorContext,
7771
"Reading %s entity content input\n", entity->name);
7773
buf = xmlBufferCreate();
7775
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7776
"xmlLoadEntityContent parameter error");
7780
input = xmlNewEntityInputStream(ctxt, entity);
7781
if (input == NULL) {
7782
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7783
"xmlLoadEntityContent input error");
7789
* Push the entity as the current input, read char by char
7790
* saving to the buffer until the end of the entity or an error
7792
if (xmlPushInput(ctxt, input) < 0) {
7799
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7801
xmlBufferAdd(buf, ctxt->input->cur, l);
7802
if (count++ > 100) {
7810
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7812
} else if (!IS_CHAR(c)) {
7813
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7814
"xmlLoadEntityContent: invalid char value %d\n",
7819
entity->content = buf->content;
7820
buf->content = NULL;
7827
* xmlParseStringPEReference:
7828
* @ctxt: an XML parser context
7829
* @str: a pointer to an index in the string
7831
* parse PEReference declarations
7833
* [69] PEReference ::= '%' Name ';'
7835
* [ WFC: No Recursion ]
7836
* A parsed entity must not contain a recursive
7837
* reference to itself, either directly or indirectly.
7839
* [ WFC: Entity Declared ]
7840
* In a document without any DTD, a document with only an internal DTD
7841
* subset which contains no parameter entity references, or a document
7842
* with "standalone='yes'", ... ... The declaration of a parameter
7843
* entity must precede any reference to it...
7845
* [ VC: Entity Declared ]
7846
* In a document with an external subset or external parameter entities
7847
* with "standalone='no'", ... ... The declaration of a parameter entity
7848
* must precede any reference to it...
7851
* Parameter-entity references may only appear in the DTD.
7852
* NOTE: misleading but this is handled.
7854
* Returns the string of the entity content.
7855
* str is updated to the current value of the index
7858
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7862
xmlEntityPtr entity = NULL;
7864
if ((str == NULL) || (*str == NULL)) return(NULL);
7870
name = xmlParseStringName(ctxt, &ptr);
7872
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7873
"xmlParseStringPEReference: no name\n");
7879
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7887
* Increate the number of entity references parsed
7892
* Request the entity from SAX
7894
if ((ctxt->sax != NULL) &&
7895
(ctxt->sax->getParameterEntity != NULL))
7896
entity = ctxt->sax->getParameterEntity(ctxt->userData,
7898
if (entity == NULL) {
7900
* [ WFC: Entity Declared ]
7901
* In a document without any DTD, a document with only an
7902
* internal DTD subset which contains no parameter entity
7903
* references, or a document with "standalone='yes'", ...
7904
* ... The declaration of a parameter entity must precede
7905
* any reference to it...
7907
if ((ctxt->standalone == 1) ||
7908
((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7909
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7910
"PEReference: %%%s; not found\n", name);
7913
* [ VC: Entity Declared ]
7914
* In a document with an external subset or external
7915
* parameter entities with "standalone='no'", ...
7916
* ... The declaration of a parameter entity must
7917
* precede any reference to it...
7919
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7920
"PEReference: %%%s; not found\n",
7926
* Internal checking in case the entity quest barfed
7928
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7929
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7930
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7931
"%%%s; is not a parameter entity\n",
7935
ctxt->hasPErefs = 1;
7942
* xmlParseDocTypeDecl:
7943
* @ctxt: an XML parser context
7945
* parse a DOCTYPE declaration
7947
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7948
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7950
* [ VC: Root Element Type ]
7951
* The Name in the document type declaration must match the element
7952
* type of the root element.
7956
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7957
const xmlChar *name = NULL;
7958
xmlChar *ExternalID = NULL;
7959
xmlChar *URI = NULL;
7962
* We know that '<!DOCTYPE' has been detected.
7969
* Parse the DOCTYPE name.
7971
name = xmlParseName(ctxt);
7973
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7974
"xmlParseDocTypeDecl : no DOCTYPE name !\n");
7976
ctxt->intSubName = name;
7981
* Check for SystemID and ExternalID
7983
URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7985
if ((URI != NULL) || (ExternalID != NULL)) {
7986
ctxt->hasExternalSubset = 1;
7988
ctxt->extSubURI = URI;
7989
ctxt->extSubSystem = ExternalID;
7994
* Create and update the internal subset.
7996
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7997
(!ctxt->disableSAX))
7998
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8001
* Is there any internal subset declarations ?
8002
* they are handled separately in xmlParseInternalSubset()
8008
* We should be at the end of the DOCTYPE declaration.
8011
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8017
* xmlParseInternalSubset:
8018
* @ctxt: an XML parser context
8020
* parse the internal subset declaration
8022
* [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8026
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8028
* Is there any DTD definition ?
8031
ctxt->instate = XML_PARSER_DTD;
8034
* Parse the succession of Markup declarations and
8036
* Subsequence (markupdecl | PEReference | S)*
8038
while (RAW != ']') {
8039
const xmlChar *check = CUR_PTR;
8040
unsigned int cons = ctxt->input->consumed;
8043
xmlParseMarkupDecl(ctxt);
8044
xmlParsePEReference(ctxt);
8047
* Pop-up of finished entities.
8049
while ((RAW == 0) && (ctxt->inputNr > 1))
8052
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8053
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8054
"xmlParseInternalSubset: error detected in Markup declaration\n");
8065
* We should be at the end of the DOCTYPE declaration.
8068
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8073
#ifdef LIBXML_SAX1_ENABLED
8075
* xmlParseAttribute:
8076
* @ctxt: an XML parser context
8077
* @value: a xmlChar ** used to store the value of the attribute
8079
* parse an attribute
8081
* [41] Attribute ::= Name Eq AttValue
8083
* [ WFC: No External Entity References ]
8084
* Attribute values cannot contain direct or indirect entity references
8085
* to external entities.
8087
* [ WFC: No < in Attribute Values ]
8088
* The replacement text of any entity referred to directly or indirectly in
8089
* an attribute value (other than "<") must not contain a <.
8091
* [ VC: Attribute Value Type ]
8092
* The attribute must have been declared; the value must be of the type
8095
* [25] Eq ::= S? '=' S?
8099
* [NS 11] Attribute ::= QName Eq AttValue
8101
* Also the case QName == xmlns:??? is handled independently as a namespace
8104
* Returns the attribute name, and the value in *value.
8108
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8109
const xmlChar *name;
8114
name = xmlParseName(ctxt);
8116
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8117
"error parsing attribute name\n");
8128
val = xmlParseAttValue(ctxt);
8129
ctxt->instate = XML_PARSER_CONTENT;
8131
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8132
"Specification mandate value for attribute %s\n", name);
8137
* Check that xml:lang conforms to the specification
8138
* No more registered as an error, just generate a warning now
8139
* since this was deprecated in XML second edition
8141
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8142
if (!xmlCheckLanguageID(val)) {
8143
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8144
"Malformed value for xml:lang : %s\n",
8150
* Check that xml:space conforms to the specification
8152
if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8153
if (xmlStrEqual(val, BAD_CAST "default"))
8155
else if (xmlStrEqual(val, BAD_CAST "preserve"))
8158
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8159
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8170
* @ctxt: an XML parser context
8172
* parse a start of tag either for rule element or
8173
* EmptyElement. In both case we don't parse the tag closing chars.
8175
* [40] STag ::= '<' Name (S Attribute)* S? '>'
8177
* [ WFC: Unique Att Spec ]
8178
* No attribute name may appear more than once in the same start-tag or
8179
* empty-element tag.
8181
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8183
* [ WFC: Unique Att Spec ]
8184
* No attribute name may appear more than once in the same start-tag or
8185
* empty-element tag.
8189
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8191
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8193
* Returns the element name parsed
8197
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8198
const xmlChar *name;
8199
const xmlChar *attname;
8201
const xmlChar **atts = ctxt->atts;
8203
int maxatts = ctxt->maxatts;
8206
if (RAW != '<') return(NULL);
8209
name = xmlParseName(ctxt);
8211
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8212
"xmlParseStartTag: invalid element name\n");
8217
* Now parse the attributes, it ends up with the ending
8224
while ((RAW != '>') &&
8225
((RAW != '/') || (NXT(1) != '>')) &&
8226
(IS_BYTE_CHAR(RAW))) {
8227
const xmlChar *q = CUR_PTR;
8228
unsigned int cons = ctxt->input->consumed;
8230
attname = xmlParseAttribute(ctxt, &attvalue);
8231
if ((attname != NULL) && (attvalue != NULL)) {
8233
* [ WFC: Unique Att Spec ]
8234
* No attribute name may appear more than once in the same
8235
* start-tag or empty-element tag.
8237
for (i = 0; i < nbatts;i += 2) {
8238
if (xmlStrEqual(atts[i], attname)) {
8239
xmlErrAttributeDup(ctxt, NULL, attname);
8245
* Add the pair to atts
8248
maxatts = 22; /* allow for 10 attrs by default */
8249
atts = (const xmlChar **)
8250
xmlMalloc(maxatts * sizeof(xmlChar *));
8252
xmlErrMemory(ctxt, NULL);
8253
if (attvalue != NULL)
8258
ctxt->maxatts = maxatts;
8259
} else if (nbatts + 4 > maxatts) {
8263
n = (const xmlChar **) xmlRealloc((void *) atts,
8264
maxatts * sizeof(const xmlChar *));
8266
xmlErrMemory(ctxt, NULL);
8267
if (attvalue != NULL)
8273
ctxt->maxatts = maxatts;
8275
atts[nbatts++] = attname;
8276
atts[nbatts++] = attvalue;
8277
atts[nbatts] = NULL;
8278
atts[nbatts + 1] = NULL;
8280
if (attvalue != NULL)
8287
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8289
if (!IS_BLANK_CH(RAW)) {
8290
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8291
"attributes construct error\n");
8294
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8295
(attname == NULL) && (attvalue == NULL)) {
8296
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8297
"xmlParseStartTag: problem parsing attributes\n");
8305
* SAX: Start of Element !
8307
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8308
(!ctxt->disableSAX)) {
8310
ctxt->sax->startElement(ctxt->userData, name, atts);
8312
ctxt->sax->startElement(ctxt->userData, name, NULL);
8316
/* Free only the content strings */
8317
for (i = 1;i < nbatts;i+=2)
8318
if (atts[i] != NULL)
8319
xmlFree((xmlChar *) atts[i]);
8326
* @ctxt: an XML parser context
8327
* @line: line of the start tag
8328
* @nsNr: number of namespaces on the start tag
8330
* parse an end of tag
8332
* [42] ETag ::= '</' Name S? '>'
8336
* [NS 9] ETag ::= '</' QName S? '>'
8340
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8341
const xmlChar *name;
8344
if ((RAW != '<') || (NXT(1) != '/')) {
8345
xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8346
"xmlParseEndTag: '</' not found\n");
8351
name = xmlParseNameAndCompare(ctxt,ctxt->name);
8354
* We should definitely be at the ending "S? '>'" part
8358
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8359
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8364
* [ WFC: Element Type Match ]
8365
* The Name in an element's end-tag must match the element type in the
8369
if (name != (xmlChar*)1) {
8370
if (name == NULL) name = BAD_CAST "unparseable";
8371
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8372
"Opening and ending tag mismatch: %s line %d and %s\n",
8373
ctxt->name, line, name);
8379
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8380
(!ctxt->disableSAX))
8381
ctxt->sax->endElement(ctxt->userData, ctxt->name);
8390
* @ctxt: an XML parser context
8392
* parse an end of tag
8394
* [42] ETag ::= '</' Name S? '>'
8398
* [NS 9] ETag ::= '</' QName S? '>'
8402
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8403
xmlParseEndTag1(ctxt, 0);
8405
#endif /* LIBXML_SAX1_ENABLED */
8407
/************************************************************************
8409
* SAX 2 specific operations *
8411
************************************************************************/
8415
* @ctxt: an XML parser context
8416
* @prefix: the prefix to lookup
8418
* Lookup the namespace name for the @prefix (which ca be NULL)
8419
* The prefix must come from the @ctxt->dict dictionnary
8421
* Returns the namespace name or NULL if not bound
8423
static const xmlChar *
8424
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8427
if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8428
for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8429
if (ctxt->nsTab[i] == prefix) {
8430
if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8432
return(ctxt->nsTab[i + 1]);
8439
* @ctxt: an XML parser context
8440
* @prefix: pointer to store the prefix part
8442
* parse an XML Namespace QName
8444
* [6] QName ::= (Prefix ':')? LocalPart
8445
* [7] Prefix ::= NCName
8446
* [8] LocalPart ::= NCName
8448
* Returns the Name parsed or NULL
8451
static const xmlChar *
8452
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8453
const xmlChar *l, *p;
8457
l = xmlParseNCName(ctxt);
8460
l = xmlParseName(ctxt);
8462
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8463
"Failed to parse QName '%s'\n", l, NULL, NULL);
8473
l = xmlParseNCName(ctxt);
8477
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8478
"Failed to parse QName '%s:'\n", p, NULL, NULL);
8479
l = xmlParseNmtoken(ctxt);
8481
tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8483
tmp = xmlBuildQName(l, p, NULL, 0);
8486
p = xmlDictLookup(ctxt->dict, tmp, -1);
8487
if (tmp != NULL) xmlFree(tmp);
8494
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8495
"Failed to parse QName '%s:%s:'\n", p, l, NULL);
8497
tmp = (xmlChar *) xmlParseName(ctxt);
8499
tmp = xmlBuildQName(tmp, l, NULL, 0);
8500
l = xmlDictLookup(ctxt->dict, tmp, -1);
8501
if (tmp != NULL) xmlFree(tmp);
8505
tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8506
l = xmlDictLookup(ctxt->dict, tmp, -1);
8507
if (tmp != NULL) xmlFree(tmp);
8518
* xmlParseQNameAndCompare:
8519
* @ctxt: an XML parser context
8520
* @name: the localname
8521
* @prefix: the prefix, if any.
8523
* parse an XML name and compares for match
8524
* (specialized for endtag parsing)
8526
* Returns NULL for an illegal name, (xmlChar*) 1 for success
8527
* and the name for mismatch
8530
static const xmlChar *
8531
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8532
xmlChar const *prefix) {
8536
const xmlChar *prefix2;
8538
if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8541
in = ctxt->input->cur;
8544
while (*in != 0 && *in == *cmp) {
8548
if ((*cmp == 0) && (*in == ':')) {
8551
while (*in != 0 && *in == *cmp) {
8555
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8557
ctxt->input->cur = in;
8558
return((const xmlChar*) 1);
8562
* all strings coms from the dictionary, equality can be done directly
8564
ret = xmlParseQName (ctxt, &prefix2);
8565
if ((ret == name) && (prefix == prefix2))
8566
return((const xmlChar*) 1);
8571
* xmlParseAttValueInternal:
8572
* @ctxt: an XML parser context
8573
* @len: attribute len result
8574
* @alloc: whether the attribute was reallocated as a new string
8575
* @normalize: if 1 then further non-CDATA normalization must be done
8577
* parse a value for an attribute.
8578
* NOTE: if no normalization is needed, the routine will return pointers
8579
* directly from the data buffer.
8581
* 3.3.3 Attribute-Value Normalization:
8582
* Before the value of an attribute is passed to the application or
8583
* checked for validity, the XML processor must normalize it as follows:
8584
* - a character reference is processed by appending the referenced
8585
* character to the attribute value
8586
* - an entity reference is processed by recursively processing the
8587
* replacement text of the entity
8588
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8589
* appending #x20 to the normalized value, except that only a single
8590
* #x20 is appended for a "#xD#xA" sequence that is part of an external
8591
* parsed entity or the literal entity value of an internal parsed entity
8592
* - other characters are processed by appending them to the normalized value
8593
* If the declared value is not CDATA, then the XML processor must further
8594
* process the normalized attribute value by discarding any leading and
8595
* trailing space (#x20) characters, and by replacing sequences of space
8596
* (#x20) characters by a single space (#x20) character.
8597
* All attributes for which no declaration has been read should be treated
8598
* by a non-validating parser as if declared CDATA.
8600
* Returns the AttValue parsed or NULL. The value has to be freed by the
8601
* caller if it was copied, this can be detected by val[*len] == 0.
8605
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8609
const xmlChar *in = NULL, *start, *end, *last;
8610
xmlChar *ret = NULL;
8613
in = (xmlChar *) CUR_PTR;
8614
if (*in != '"' && *in != '\'') {
8615
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8618
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8621
* try to handle in this routine the most common case where no
8622
* allocation of a new string is required and where content is
8626
end = ctxt->input->end;
8629
const xmlChar *oldbase = ctxt->input->base;
8631
if (oldbase != ctxt->input->base) {
8632
long delta = ctxt->input->base - oldbase;
8633
start = start + delta;
8636
end = ctxt->input->end;
8640
* Skip any leading spaces
8642
while ((in < end) && (*in != limit) &&
8643
((*in == 0x20) || (*in == 0x9) ||
8644
(*in == 0xA) || (*in == 0xD))) {
8648
const xmlChar *oldbase = ctxt->input->base;
8650
if (oldbase != ctxt->input->base) {
8651
long delta = ctxt->input->base - oldbase;
8652
start = start + delta;
8655
end = ctxt->input->end;
8658
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8659
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8660
if ((*in++ == 0x20) && (*in == 0x20)) break;
8662
const xmlChar *oldbase = ctxt->input->base;
8664
if (oldbase != ctxt->input->base) {
8665
long delta = ctxt->input->base - oldbase;
8666
start = start + delta;
8669
end = ctxt->input->end;
8674
* skip the trailing blanks
8676
while ((last[-1] == 0x20) && (last > start)) last--;
8677
while ((in < end) && (*in != limit) &&
8678
((*in == 0x20) || (*in == 0x9) ||
8679
(*in == 0xA) || (*in == 0xD))) {
8682
const xmlChar *oldbase = ctxt->input->base;
8684
if (oldbase != ctxt->input->base) {
8685
long delta = ctxt->input->base - oldbase;
8686
start = start + delta;
8688
last = last + delta;
8690
end = ctxt->input->end;
8693
if (*in != limit) goto need_complex;
8695
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8696
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8699
const xmlChar *oldbase = ctxt->input->base;
8701
if (oldbase != ctxt->input->base) {
8702
long delta = ctxt->input->base - oldbase;
8703
start = start + delta;
8706
end = ctxt->input->end;
8710
if (*in != limit) goto need_complex;
8714
*len = last - start;
8715
ret = (xmlChar *) start;
8717
if (alloc) *alloc = 1;
8718
ret = xmlStrndup(start, last - start);
8721
if (alloc) *alloc = 0;
8724
if (alloc) *alloc = 1;
8725
return xmlParseAttValueComplex(ctxt, len, normalize);
8729
* xmlParseAttribute2:
8730
* @ctxt: an XML parser context
8731
* @pref: the element prefix
8732
* @elem: the element name
8733
* @prefix: a xmlChar ** used to store the value of the attribute prefix
8734
* @value: a xmlChar ** used to store the value of the attribute
8735
* @len: an int * to save the length of the attribute
8736
* @alloc: an int * to indicate if the attribute was allocated
8738
* parse an attribute in the new SAX2 framework.
8740
* Returns the attribute name, and the value in *value, .
8743
static const xmlChar *
8744
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8745
const xmlChar * pref, const xmlChar * elem,
8746
const xmlChar ** prefix, xmlChar ** value,
8747
int *len, int *alloc)
8749
const xmlChar *name;
8750
xmlChar *val, *internal_val = NULL;
8755
name = xmlParseQName(ctxt, prefix);
8757
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8758
"error parsing attribute name\n");
8763
* get the type if needed
8765
if (ctxt->attsSpecial != NULL) {
8768
type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8769
pref, elem, *prefix, name);
8781
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8784
* Sometimes a second normalisation pass for spaces is needed
8785
* but that only happens if charrefs or entities refernces
8786
* have been used in the attribute value, i.e. the attribute
8787
* value have been extracted in an allocated string already.
8790
const xmlChar *val2;
8792
val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8793
if ((val2 != NULL) && (val2 != val)) {
8795
val = (xmlChar *) val2;
8799
ctxt->instate = XML_PARSER_CONTENT;
8801
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8802
"Specification mandate value for attribute %s\n",
8807
if (*prefix == ctxt->str_xml) {
8809
* Check that xml:lang conforms to the specification
8810
* No more registered as an error, just generate a warning now
8811
* since this was deprecated in XML second edition
8813
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8814
internal_val = xmlStrndup(val, *len);
8815
if (!xmlCheckLanguageID(internal_val)) {
8816
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8817
"Malformed value for xml:lang : %s\n",
8818
internal_val, NULL);
8823
* Check that xml:space conforms to the specification
8825
if (xmlStrEqual(name, BAD_CAST "space")) {
8826
internal_val = xmlStrndup(val, *len);
8827
if (xmlStrEqual(internal_val, BAD_CAST "default"))
8829
else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8832
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8833
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8834
internal_val, NULL);
8838
xmlFree(internal_val);
8846
* xmlParseStartTag2:
8847
* @ctxt: an XML parser context
8849
* parse a start of tag either for rule element or
8850
* EmptyElement. In both case we don't parse the tag closing chars.
8851
* This routine is called when running SAX2 parsing
8853
* [40] STag ::= '<' Name (S Attribute)* S? '>'
8855
* [ WFC: Unique Att Spec ]
8856
* No attribute name may appear more than once in the same start-tag or
8857
* empty-element tag.
8859
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8861
* [ WFC: Unique Att Spec ]
8862
* No attribute name may appear more than once in the same start-tag or
8863
* empty-element tag.
8867
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8869
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8871
* Returns the element name parsed
8874
static const xmlChar *
8875
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8876
const xmlChar **URI, int *tlen) {
8877
const xmlChar *localname;
8878
const xmlChar *prefix;
8879
const xmlChar *attname;
8880
const xmlChar *aprefix;
8881
const xmlChar *nsname;
8883
const xmlChar **atts = ctxt->atts;
8884
int maxatts = ctxt->maxatts;
8885
int nratts, nbatts, nbdef;
8886
int i, j, nbNs, attval, oldline, oldcol;
8887
const xmlChar *base;
8889
int nsNr = ctxt->nsNr;
8891
if (RAW != '<') return(NULL);
8895
* NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8896
* point since the attribute values may be stored as pointers to
8897
* the buffer and calling SHRINK would destroy them !
8898
* The Shrinking is only possible once the full set of attribute
8899
* callbacks have been done.
8903
base = ctxt->input->base;
8904
cur = ctxt->input->cur - ctxt->input->base;
8905
oldline = ctxt->input->line;
8906
oldcol = ctxt->input->col;
8912
/* Forget any namespaces added during an earlier parse of this element. */
8915
localname = xmlParseQName(ctxt, &prefix);
8916
if (localname == NULL) {
8917
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8918
"StartTag: invalid element name\n");
8921
*tlen = ctxt->input->cur - ctxt->input->base - cur;
8924
* Now parse the attributes, it ends up with the ending
8930
if (ctxt->input->base != base) goto base_changed;
8932
while ((RAW != '>') &&
8933
((RAW != '/') || (NXT(1) != '>')) &&
8934
(IS_BYTE_CHAR(RAW))) {
8935
const xmlChar *q = CUR_PTR;
8936
unsigned int cons = ctxt->input->consumed;
8937
int len = -1, alloc = 0;
8939
attname = xmlParseAttribute2(ctxt, prefix, localname,
8940
&aprefix, &attvalue, &len, &alloc);
8941
if (ctxt->input->base != base) {
8942
if ((attvalue != NULL) && (alloc != 0))
8947
if ((attname != NULL) && (attvalue != NULL)) {
8948
if (len < 0) len = xmlStrlen(attvalue);
8949
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8950
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8954
uri = xmlParseURI((const char *) URL);
8956
xmlNsErr(ctxt, XML_WAR_NS_URI,
8957
"xmlns: '%s' is not a valid URI\n",
8960
if (uri->scheme == NULL) {
8961
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8962
"xmlns: URI %s is not absolute\n",
8967
if (URL == ctxt->str_xml_ns) {
8968
if (attname != ctxt->str_xml) {
8969
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8970
"xml namespace URI cannot be the default namespace\n",
8973
goto skip_default_ns;
8977
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8978
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8979
"reuse of the xmlns namespace name is forbidden\n",
8981
goto skip_default_ns;
8985
* check that it's not a defined namespace
8987
for (j = 1;j <= nbNs;j++)
8988
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8991
xmlErrAttributeDup(ctxt, NULL, attname);
8993
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8995
if (alloc != 0) xmlFree(attvalue);
8999
if (aprefix == ctxt->str_xmlns) {
9000
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9003
if (attname == ctxt->str_xml) {
9004
if (URL != ctxt->str_xml_ns) {
9005
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9006
"xml namespace prefix mapped to wrong URI\n",
9010
* Do not keep a namespace definition node
9014
if (URL == ctxt->str_xml_ns) {
9015
if (attname != ctxt->str_xml) {
9016
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9017
"xml namespace URI mapped to wrong prefix\n",
9022
if (attname == ctxt->str_xmlns) {
9023
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9024
"redefinition of the xmlns prefix is forbidden\n",
9030
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9031
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9032
"reuse of the xmlns namespace name is forbidden\n",
9036
if ((URL == NULL) || (URL[0] == 0)) {
9037
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9038
"xmlns:%s: Empty XML namespace is not allowed\n",
9039
attname, NULL, NULL);
9042
uri = xmlParseURI((const char *) URL);
9044
xmlNsErr(ctxt, XML_WAR_NS_URI,
9045
"xmlns:%s: '%s' is not a valid URI\n",
9046
attname, URL, NULL);
9048
if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9049
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9050
"xmlns:%s: URI %s is not absolute\n",
9051
attname, URL, NULL);
9058
* check that it's not a defined namespace
9060
for (j = 1;j <= nbNs;j++)
9061
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9064
xmlErrAttributeDup(ctxt, aprefix, attname);
9066
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9068
if (alloc != 0) xmlFree(attvalue);
9070
if (ctxt->input->base != base) goto base_changed;
9075
* Add the pair to atts
9077
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9078
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9079
if (attvalue[len] == 0)
9083
maxatts = ctxt->maxatts;
9086
ctxt->attallocs[nratts++] = alloc;
9087
atts[nbatts++] = attname;
9088
atts[nbatts++] = aprefix;
9089
atts[nbatts++] = NULL; /* the URI will be fetched later */
9090
atts[nbatts++] = attvalue;
9092
atts[nbatts++] = attvalue;
9094
* tag if some deallocation is needed
9096
if (alloc != 0) attval = 1;
9098
if ((attvalue != NULL) && (attvalue[len] == 0))
9105
if (ctxt->input->base != base) goto base_changed;
9106
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9108
if (!IS_BLANK_CH(RAW)) {
9109
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9110
"attributes construct error\n");
9114
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9115
(attname == NULL) && (attvalue == NULL)) {
9116
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9117
"xmlParseStartTag: problem parsing attributes\n");
9121
if (ctxt->input->base != base) goto base_changed;
9125
* The attributes defaulting
9127
if (ctxt->attsDefault != NULL) {
9128
xmlDefAttrsPtr defaults;
9130
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9131
if (defaults != NULL) {
9132
for (i = 0;i < defaults->nbAttrs;i++) {
9133
attname = defaults->values[5 * i];
9134
aprefix = defaults->values[5 * i + 1];
9137
* special work for namespaces defaulted defs
9139
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9141
* check that it's not a defined namespace
9143
for (j = 1;j <= nbNs;j++)
9144
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9146
if (j <= nbNs) continue;
9148
nsname = xmlGetNamespace(ctxt, NULL);
9149
if (nsname != defaults->values[5 * i + 2]) {
9150
if (nsPush(ctxt, NULL,
9151
defaults->values[5 * i + 2]) > 0)
9154
} else if (aprefix == ctxt->str_xmlns) {
9156
* check that it's not a defined namespace
9158
for (j = 1;j <= nbNs;j++)
9159
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9161
if (j <= nbNs) continue;
9163
nsname = xmlGetNamespace(ctxt, attname);
9164
if (nsname != defaults->values[2]) {
9165
if (nsPush(ctxt, attname,
9166
defaults->values[5 * i + 2]) > 0)
9171
* check that it's not a defined attribute
9173
for (j = 0;j < nbatts;j+=5) {
9174
if ((attname == atts[j]) && (aprefix == atts[j+1]))
9177
if (j < nbatts) continue;
9179
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9180
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9183
maxatts = ctxt->maxatts;
9186
atts[nbatts++] = attname;
9187
atts[nbatts++] = aprefix;
9188
if (aprefix == NULL)
9189
atts[nbatts++] = NULL;
9191
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9192
atts[nbatts++] = defaults->values[5 * i + 2];
9193
atts[nbatts++] = defaults->values[5 * i + 3];
9194
if ((ctxt->standalone == 1) &&
9195
(defaults->values[5 * i + 4] != NULL)) {
9196
xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9197
"standalone: attribute %s on %s defaulted from external subset\n",
9198
attname, localname);
9207
* The attributes checkings
9209
for (i = 0; i < nbatts;i += 5) {
9211
* The default namespace does not apply to attribute names.
9213
if (atts[i + 1] != NULL) {
9214
nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9215
if (nsname == NULL) {
9216
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9217
"Namespace prefix %s for %s on %s is not defined\n",
9218
atts[i + 1], atts[i], localname);
9220
atts[i + 2] = nsname;
9224
* [ WFC: Unique Att Spec ]
9225
* No attribute name may appear more than once in the same
9226
* start-tag or empty-element tag.
9227
* As extended by the Namespace in XML REC.
9229
for (j = 0; j < i;j += 5) {
9230
if (atts[i] == atts[j]) {
9231
if (atts[i+1] == atts[j+1]) {
9232
xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9235
if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9236
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9237
"Namespaced Attribute %s in '%s' redefined\n",
9238
atts[i], nsname, NULL);
9245
nsname = xmlGetNamespace(ctxt, prefix);
9246
if ((prefix != NULL) && (nsname == NULL)) {
9247
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9248
"Namespace prefix %s on %s is not defined\n",
9249
prefix, localname, NULL);
9255
* SAX: Start of Element !
9257
if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9258
(!ctxt->disableSAX)) {
9260
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9261
nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9262
nbatts / 5, nbdef, atts);
9264
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9265
nsname, 0, NULL, nbatts / 5, nbdef, atts);
9269
* Free up attribute allocated strings if needed
9272
for (i = 3,j = 0; j < nratts;i += 5,j++)
9273
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9274
xmlFree((xmlChar *) atts[i]);
9281
* the attribute strings are valid iif the base didn't changed
9284
for (i = 3,j = 0; j < nratts;i += 5,j++)
9285
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9286
xmlFree((xmlChar *) atts[i]);
9288
ctxt->input->cur = ctxt->input->base + cur;
9289
ctxt->input->line = oldline;
9290
ctxt->input->col = oldcol;
9291
if (ctxt->wellFormed == 1) {
9299
* @ctxt: an XML parser context
9300
* @line: line of the start tag
9301
* @nsNr: number of namespaces on the start tag
9303
* parse an end of tag
9305
* [42] ETag ::= '</' Name S? '>'
9309
* [NS 9] ETag ::= '</' QName S? '>'
9313
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9314
const xmlChar *URI, int line, int nsNr, int tlen) {
9315
const xmlChar *name;
9318
if ((RAW != '<') || (NXT(1) != '/')) {
9319
xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9324
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9325
if (ctxt->input->cur[tlen] == '>') {
9326
ctxt->input->cur += tlen + 1;
9329
ctxt->input->cur += tlen;
9333
name = xmlParseNameAndCompare(ctxt, ctxt->name);
9335
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9339
* We should definitely be at the ending "S? '>'" part
9343
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9344
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9349
* [ WFC: Element Type Match ]
9350
* The Name in an element's end-tag must match the element type in the
9354
if (name != (xmlChar*)1) {
9355
if (name == NULL) name = BAD_CAST "unparseable";
9356
if ((line == 0) && (ctxt->node != NULL))
9357
line = ctxt->node->line;
9358
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9359
"Opening and ending tag mismatch: %s line %d and %s\n",
9360
ctxt->name, line, name);
9367
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9368
(!ctxt->disableSAX))
9369
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9379
* @ctxt: an XML parser context
9381
* Parse escaped pure raw content.
9383
* [18] CDSect ::= CDStart CData CDEnd
9385
* [19] CDStart ::= '<![CDATA['
9387
* [20] Data ::= (Char* - (Char* ']]>' Char*))
9389
* [21] CDEnd ::= ']]>'
9392
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9393
xmlChar *buf = NULL;
9395
int size = XML_PARSER_BUFFER_SIZE;
9401
/* Check 2.6.0 was NXT(0) not RAW */
9402
if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9407
ctxt->instate = XML_PARSER_CDATA_SECTION;
9410
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9411
ctxt->instate = XML_PARSER_CONTENT;
9417
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9418
ctxt->instate = XML_PARSER_CONTENT;
9423
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9425
xmlErrMemory(ctxt, NULL);
9428
while (IS_CHAR(cur) &&
9429
((r != ']') || (s != ']') || (cur != '>'))) {
9430
if (len + 5 >= size) {
9434
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9437
xmlErrMemory(ctxt, NULL);
9442
COPY_BUF(rl,buf,len,r);
9456
ctxt->instate = XML_PARSER_CONTENT;
9458
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9459
"CData section not finished\n%.50s\n", buf);
9466
* OK the buffer is to be consumed as cdata.
9468
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9469
if (ctxt->sax->cdataBlock != NULL)
9470
ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9471
else if (ctxt->sax->characters != NULL)
9472
ctxt->sax->characters(ctxt->userData, buf, len);
9479
* @ctxt: an XML parser context
9483
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9487
xmlParseContent(xmlParserCtxtPtr ctxt) {
9489
while ((RAW != 0) &&
9490
((RAW != '<') || (NXT(1) != '/')) &&
9491
(ctxt->instate != XML_PARSER_EOF)) {
9492
const xmlChar *test = CUR_PTR;
9493
unsigned int cons = ctxt->input->consumed;
9494
const xmlChar *cur = ctxt->input->cur;
9497
* First case : a Processing Instruction.
9499
if ((*cur == '<') && (cur[1] == '?')) {
9504
* Second case : a CDSection
9506
/* 2.6.0 test was *cur not RAW */
9507
else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9508
xmlParseCDSect(ctxt);
9512
* Third case : a comment
9514
else if ((*cur == '<') && (NXT(1) == '!') &&
9515
(NXT(2) == '-') && (NXT(3) == '-')) {
9516
xmlParseComment(ctxt);
9517
ctxt->instate = XML_PARSER_CONTENT;
9521
* Fourth case : a sub-element.
9523
else if (*cur == '<') {
9524
xmlParseElement(ctxt);
9528
* Fifth case : a reference. If if has not been resolved,
9529
* parsing returns it's Name, create the node
9532
else if (*cur == '&') {
9533
xmlParseReference(ctxt);
9537
* Last case, text. Note that References are handled directly.
9540
xmlParseCharData(ctxt, 0);
9545
* Pop-up of finished entities.
9547
while ((RAW == 0) && (ctxt->inputNr > 1))
9551
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9552
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9553
"detected an error in element content\n");
9554
ctxt->instate = XML_PARSER_EOF;
9562
* @ctxt: an XML parser context
9564
* parse an XML element, this is highly recursive
9566
* [39] element ::= EmptyElemTag | STag content ETag
9568
* [ WFC: Element Type Match ]
9569
* The Name in an element's end-tag must match the element type in the
9575
xmlParseElement(xmlParserCtxtPtr ctxt) {
9576
const xmlChar *name;
9577
const xmlChar *prefix = NULL;
9578
const xmlChar *URI = NULL;
9579
xmlParserNodeInfo node_info;
9582
int nsNr = ctxt->nsNr;
9584
if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9585
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9586
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9587
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9589
ctxt->instate = XML_PARSER_EOF;
9593
/* Capture start position */
9594
if (ctxt->record_info) {
9595
node_info.begin_pos = ctxt->input->consumed +
9596
(CUR_PTR - ctxt->input->base);
9597
node_info.begin_line = ctxt->input->line;
9600
if (ctxt->spaceNr == 0)
9601
spacePush(ctxt, -1);
9602
else if (*ctxt->space == -2)
9603
spacePush(ctxt, -1);
9605
spacePush(ctxt, *ctxt->space);
9607
line = ctxt->input->line;
9608
#ifdef LIBXML_SAX1_ENABLED
9610
#endif /* LIBXML_SAX1_ENABLED */
9611
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9612
#ifdef LIBXML_SAX1_ENABLED
9614
name = xmlParseStartTag(ctxt);
9615
#endif /* LIBXML_SAX1_ENABLED */
9616
if (ctxt->instate == XML_PARSER_EOF)
9622
namePush(ctxt, name);
9625
#ifdef LIBXML_VALID_ENABLED
9627
* [ VC: Root Element Type ]
9628
* The Name in the document type declaration must match the element
9629
* type of the root element.
9631
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9632
ctxt->node && (ctxt->node == ctxt->myDoc->children))
9633
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9634
#endif /* LIBXML_VALID_ENABLED */
9637
* Check for an Empty Element.
9639
if ((RAW == '/') && (NXT(1) == '>')) {
9642
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9643
(!ctxt->disableSAX))
9644
ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9645
#ifdef LIBXML_SAX1_ENABLED
9647
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9648
(!ctxt->disableSAX))
9649
ctxt->sax->endElement(ctxt->userData, name);
9650
#endif /* LIBXML_SAX1_ENABLED */
9654
if (nsNr != ctxt->nsNr)
9655
nsPop(ctxt, ctxt->nsNr - nsNr);
9656
if ( ret != NULL && ctxt->record_info ) {
9657
node_info.end_pos = ctxt->input->consumed +
9658
(CUR_PTR - ctxt->input->base);
9659
node_info.end_line = ctxt->input->line;
9660
node_info.node = ret;
9661
xmlParserAddNodeInfo(ctxt, &node_info);
9668
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9669
"Couldn't find end of Start Tag %s line %d\n",
9673
* end of parsing of this node.
9678
if (nsNr != ctxt->nsNr)
9679
nsPop(ctxt, ctxt->nsNr - nsNr);
9682
* Capture end position and add node
9684
if ( ret != NULL && ctxt->record_info ) {
9685
node_info.end_pos = ctxt->input->consumed +
9686
(CUR_PTR - ctxt->input->base);
9687
node_info.end_line = ctxt->input->line;
9688
node_info.node = ret;
9689
xmlParserAddNodeInfo(ctxt, &node_info);
9695
* Parse the content of the element:
9697
xmlParseContent(ctxt);
9698
if (!IS_BYTE_CHAR(RAW)) {
9699
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9700
"Premature end of data in tag %s line %d\n",
9704
* end of parsing of this node.
9709
if (nsNr != ctxt->nsNr)
9710
nsPop(ctxt, ctxt->nsNr - nsNr);
9715
* parse the end of tag: '</' should be here.
9718
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9721
#ifdef LIBXML_SAX1_ENABLED
9723
xmlParseEndTag1(ctxt, line);
9724
#endif /* LIBXML_SAX1_ENABLED */
9727
* Capture end position and add node
9729
if ( ret != NULL && ctxt->record_info ) {
9730
node_info.end_pos = ctxt->input->consumed +
9731
(CUR_PTR - ctxt->input->base);
9732
node_info.end_line = ctxt->input->line;
9733
node_info.node = ret;
9734
xmlParserAddNodeInfo(ctxt, &node_info);
9739
* xmlParseVersionNum:
9740
* @ctxt: an XML parser context
9742
* parse the XML version value.
9744
* [26] VersionNum ::= '1.' [0-9]+
9746
* In practice allow [0-9].[0-9]+ at that level
9748
* Returns the string giving the XML version number, or NULL
9751
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9752
xmlChar *buf = NULL;
9757
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9759
xmlErrMemory(ctxt, NULL);
9763
if (!((cur >= '0') && (cur <= '9'))) {
9777
while ((cur >= '0') && (cur <= '9')) {
9778
if (len + 1 >= size) {
9782
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9785
xmlErrMemory(ctxt, NULL);
9799
* xmlParseVersionInfo:
9800
* @ctxt: an XML parser context
9802
* parse the XML version.
9804
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9806
* [25] Eq ::= S? '=' S?
9808
* Returns the version string, e.g. "1.0"
9812
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9813
xmlChar *version = NULL;
9815
if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9819
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9826
version = xmlParseVersionNum(ctxt);
9828
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9831
} else if (RAW == '\''){
9833
version = xmlParseVersionNum(ctxt);
9835
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9839
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9847
* @ctxt: an XML parser context
9849
* parse the XML encoding name
9851
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9853
* Returns the encoding name value or NULL
9856
xmlParseEncName(xmlParserCtxtPtr ctxt) {
9857
xmlChar *buf = NULL;
9863
if (((cur >= 'a') && (cur <= 'z')) ||
9864
((cur >= 'A') && (cur <= 'Z'))) {
9865
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9867
xmlErrMemory(ctxt, NULL);
9874
while (((cur >= 'a') && (cur <= 'z')) ||
9875
((cur >= 'A') && (cur <= 'Z')) ||
9876
((cur >= '0') && (cur <= '9')) ||
9877
(cur == '.') || (cur == '_') ||
9879
if (len + 1 >= size) {
9883
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9885
xmlErrMemory(ctxt, NULL);
9902
xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9908
* xmlParseEncodingDecl:
9909
* @ctxt: an XML parser context
9911
* parse the XML encoding declaration
9913
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9915
* this setups the conversion filters.
9917
* Returns the encoding value or NULL
9921
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9922
xmlChar *encoding = NULL;
9925
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9929
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9936
encoding = xmlParseEncName(ctxt);
9938
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9941
} else if (RAW == '\''){
9943
encoding = xmlParseEncName(ctxt);
9945
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9949
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9953
* Non standard parsing, allowing the user to ignore encoding
9955
if (ctxt->options & XML_PARSE_IGNORE_ENC)
9959
* UTF-16 encoding stwich has already taken place at this stage,
9960
* more over the little-endian/big-endian selection is already done
9962
if ((encoding != NULL) &&
9963
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9964
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9966
* If no encoding was passed to the parser, that we are
9967
* using UTF-16 and no decoder is present i.e. the
9968
* document is apparently UTF-8 compatible, then raise an
9969
* encoding mismatch fatal error
9971
if ((ctxt->encoding == NULL) &&
9972
(ctxt->input->buf != NULL) &&
9973
(ctxt->input->buf->encoder == NULL)) {
9974
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9975
"Document labelled UTF-16 but has UTF-8 content\n");
9977
if (ctxt->encoding != NULL)
9978
xmlFree((xmlChar *) ctxt->encoding);
9979
ctxt->encoding = encoding;
9982
* UTF-8 encoding is handled natively
9984
else if ((encoding != NULL) &&
9985
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9986
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9987
if (ctxt->encoding != NULL)
9988
xmlFree((xmlChar *) ctxt->encoding);
9989
ctxt->encoding = encoding;
9991
else if (encoding != NULL) {
9992
xmlCharEncodingHandlerPtr handler;
9994
if (ctxt->input->encoding != NULL)
9995
xmlFree((xmlChar *) ctxt->input->encoding);
9996
ctxt->input->encoding = encoding;
9998
handler = xmlFindCharEncodingHandler((const char *) encoding);
9999
if (handler != NULL) {
10000
xmlSwitchToEncoding(ctxt, handler);
10002
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10003
"Unsupported encoding %s\n", encoding);
10013
* @ctxt: an XML parser context
10015
* parse the XML standalone declaration
10017
* [32] SDDecl ::= S 'standalone' Eq
10018
* (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10020
* [ VC: Standalone Document Declaration ]
10021
* TODO The standalone document declaration must have the value "no"
10022
* if any external markup declarations contain declarations of:
10023
* - attributes with default values, if elements to which these
10024
* attributes apply appear in the document without specifications
10025
* of values for these attributes, or
10026
* - entities (other than amp, lt, gt, apos, quot), if references
10027
* to those entities appear in the document, or
10028
* - attributes with values subject to normalization, where the
10029
* attribute appears in the document with a value which will change
10030
* as a result of normalization, or
10031
* - element types with element content, if white space occurs directly
10032
* within any instance of those types.
10035
* 1 if standalone="yes"
10036
* 0 if standalone="no"
10037
* -2 if standalone attribute is missing or invalid
10038
* (A standalone value of -2 means that the XML declaration was found,
10039
* but no value was specified for the standalone attribute).
10043
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10044
int standalone = -2;
10047
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10051
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10052
return(standalone);
10058
if ((RAW == 'n') && (NXT(1) == 'o')) {
10061
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10066
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10069
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10072
} else if (RAW == '"'){
10074
if ((RAW == 'n') && (NXT(1) == 'o')) {
10077
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10082
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10085
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10089
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10092
return(standalone);
10097
* @ctxt: an XML parser context
10099
* parse an XML declaration header
10101
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10105
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10109
* This value for standalone indicates that the document has an
10110
* XML declaration but it does not have a standalone attribute.
10111
* It will be overwritten later if a standalone attribute is found.
10113
ctxt->input->standalone = -2;
10116
* We know that '<?xml' is here.
10120
if (!IS_BLANK_CH(RAW)) {
10121
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10122
"Blank needed after '<?xml'\n");
10127
* We must have the VersionInfo here.
10129
version = xmlParseVersionInfo(ctxt);
10130
if (version == NULL) {
10131
xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10133
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10135
* Changed here for XML-1.0 5th edition
10137
if (ctxt->options & XML_PARSE_OLD10) {
10138
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10139
"Unsupported version '%s'\n",
10142
if ((version[0] == '1') && ((version[1] == '.'))) {
10143
xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10144
"Unsupported version '%s'\n",
10147
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10148
"Unsupported version '%s'\n",
10153
if (ctxt->version != NULL)
10154
xmlFree((void *) ctxt->version);
10155
ctxt->version = version;
10159
* We may have the encoding declaration
10161
if (!IS_BLANK_CH(RAW)) {
10162
if ((RAW == '?') && (NXT(1) == '>')) {
10166
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10168
xmlParseEncodingDecl(ctxt);
10169
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10171
* The XML REC instructs us to stop parsing right here
10177
* We may have the standalone status.
10179
if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10180
if ((RAW == '?') && (NXT(1) == '>')) {
10184
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10188
* We can grow the input buffer freely at that point
10193
ctxt->input->standalone = xmlParseSDDecl(ctxt);
10196
if ((RAW == '?') && (NXT(1) == '>')) {
10198
} else if (RAW == '>') {
10199
/* Deprecated old WD ... */
10200
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10203
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10204
MOVETO_ENDTAG(CUR_PTR);
10211
* @ctxt: an XML parser context
10213
* parse an XML Misc* optional field.
10215
* [27] Misc ::= Comment | PI | S
10219
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10220
while (((RAW == '<') && (NXT(1) == '?')) ||
10221
(CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10222
IS_BLANK_CH(CUR)) {
10223
if ((RAW == '<') && (NXT(1) == '?')) {
10225
} else if (IS_BLANK_CH(CUR)) {
10228
xmlParseComment(ctxt);
10233
* xmlParseDocument:
10234
* @ctxt: an XML parser context
10236
* parse an XML document (and build a tree if using the standard SAX
10239
* [1] document ::= prolog element Misc*
10241
* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10243
* Returns 0, -1 in case of error. the parser context is augmented
10244
* as a result of the parsing.
10248
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10250
xmlCharEncoding enc;
10254
if ((ctxt == NULL) || (ctxt->input == NULL))
10260
* SAX: detecting the level.
10262
xmlDetectSAX2(ctxt);
10265
* SAX: beginning of the document processing.
10267
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10268
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10270
if ((ctxt->encoding == NULL) &&
10271
((ctxt->input->end - ctxt->input->cur) >= 4)) {
10273
* Get the 4 first bytes and decode the charset
10274
* if enc != XML_CHAR_ENCODING_NONE
10275
* plug some encoding conversion routines.
10281
enc = xmlDetectCharEncoding(&start[0], 4);
10282
if (enc != XML_CHAR_ENCODING_NONE) {
10283
xmlSwitchEncoding(ctxt, enc);
10289
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10293
* Check for the XMLDecl in the Prolog.
10294
* do not GROW here to avoid the detected encoder to decode more
10295
* than just the first line, unless the amount of data is really
10296
* too small to hold "<?xml version="1.0" encoding="foo"
10298
if ((ctxt->input->end - ctxt->input->cur) < 35) {
10301
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10304
* Note that we will switch encoding on the fly.
10306
xmlParseXMLDecl(ctxt);
10307
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10309
* The XML REC instructs us to stop parsing right here
10313
ctxt->standalone = ctxt->input->standalone;
10316
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10318
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10319
ctxt->sax->startDocument(ctxt->userData);
10322
* The Misc part of the Prolog
10325
xmlParseMisc(ctxt);
10328
* Then possibly doc type declaration(s) and more Misc
10329
* (doctypedecl Misc*)?
10332
if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10334
ctxt->inSubset = 1;
10335
xmlParseDocTypeDecl(ctxt);
10337
ctxt->instate = XML_PARSER_DTD;
10338
xmlParseInternalSubset(ctxt);
10342
* Create and update the external subset.
10344
ctxt->inSubset = 2;
10345
if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10346
(!ctxt->disableSAX))
10347
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10348
ctxt->extSubSystem, ctxt->extSubURI);
10349
ctxt->inSubset = 0;
10351
xmlCleanSpecialAttr(ctxt);
10353
ctxt->instate = XML_PARSER_PROLOG;
10354
xmlParseMisc(ctxt);
10358
* Time to start parsing the tree itself
10362
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10363
"Start tag expected, '<' not found\n");
10365
ctxt->instate = XML_PARSER_CONTENT;
10366
xmlParseElement(ctxt);
10367
ctxt->instate = XML_PARSER_EPILOG;
10371
* The Misc part at the end
10373
xmlParseMisc(ctxt);
10376
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10378
ctxt->instate = XML_PARSER_EOF;
10382
* SAX: end of the document processing.
10384
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10385
ctxt->sax->endDocument(ctxt->userData);
10388
* Remove locally kept entity definitions if the tree was not built
10390
if ((ctxt->myDoc != NULL) &&
10391
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10392
xmlFreeDoc(ctxt->myDoc);
10393
ctxt->myDoc = NULL;
10396
if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10397
ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10399
ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10400
if (ctxt->nsWellFormed)
10401
ctxt->myDoc->properties |= XML_DOC_NSVALID;
10402
if (ctxt->options & XML_PARSE_OLD10)
10403
ctxt->myDoc->properties |= XML_DOC_OLD10;
10405
if (! ctxt->wellFormed) {
10413
* xmlParseExtParsedEnt:
10414
* @ctxt: an XML parser context
10416
* parse a general parsed entity
10417
* An external general parsed entity is well-formed if it matches the
10418
* production labeled extParsedEnt.
10420
* [78] extParsedEnt ::= TextDecl? content
10422
* Returns 0, -1 in case of error. the parser context is augmented
10423
* as a result of the parsing.
10427
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10429
xmlCharEncoding enc;
10431
if ((ctxt == NULL) || (ctxt->input == NULL))
10434
xmlDefaultSAXHandlerInit();
10436
xmlDetectSAX2(ctxt);
10441
* SAX: beginning of the document processing.
10443
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10444
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10447
* Get the 4 first bytes and decode the charset
10448
* if enc != XML_CHAR_ENCODING_NONE
10449
* plug some encoding conversion routines.
10451
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10456
enc = xmlDetectCharEncoding(start, 4);
10457
if (enc != XML_CHAR_ENCODING_NONE) {
10458
xmlSwitchEncoding(ctxt, enc);
10464
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10468
* Check for the XMLDecl in the Prolog.
10471
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10474
* Note that we will switch encoding on the fly.
10476
xmlParseXMLDecl(ctxt);
10477
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10479
* The XML REC instructs us to stop parsing right here
10485
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10487
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10488
ctxt->sax->startDocument(ctxt->userData);
10491
* Doing validity checking on chunk doesn't make sense
10493
ctxt->instate = XML_PARSER_CONTENT;
10494
ctxt->validate = 0;
10495
ctxt->loadsubset = 0;
10498
xmlParseContent(ctxt);
10500
if ((RAW == '<') && (NXT(1) == '/')) {
10501
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10502
} else if (RAW != 0) {
10503
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10507
* SAX: end of the document processing.
10509
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10510
ctxt->sax->endDocument(ctxt->userData);
10512
if (! ctxt->wellFormed) return(-1);
10516
#ifdef LIBXML_PUSH_ENABLED
10517
/************************************************************************
10519
* Progressive parsing interfaces *
10521
************************************************************************/
10524
* xmlParseLookupSequence:
10525
* @ctxt: an XML parser context
10526
* @first: the first char to lookup
10527
* @next: the next char to lookup or zero
10528
* @third: the next char to lookup or zero
10530
* Try to find if a sequence (first, next, third) or just (first next) or
10531
* (first) is available in the input stream.
10532
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
10533
* to avoid rescanning sequences of bytes, it DOES change the state of the
10534
* parser, do not use liberally.
10536
* Returns the index to the current parsing point if the full sequence
10537
* is available, -1 otherwise.
10540
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10541
xmlChar next, xmlChar third) {
10543
xmlParserInputPtr in;
10544
const xmlChar *buf;
10547
if (in == NULL) return(-1);
10548
base = in->cur - in->base;
10549
if (base < 0) return(-1);
10550
if (ctxt->checkIndex > base)
10551
base = ctxt->checkIndex;
10552
if (in->buf == NULL) {
10556
buf = in->buf->buffer->content;
10557
len = in->buf->buffer->use;
10559
/* take into account the sequence length */
10560
if (third) len -= 2;
10561
else if (next) len --;
10562
for (;base < len;base++) {
10563
if (buf[base] == first) {
10565
if ((buf[base + 1] != next) ||
10566
(buf[base + 2] != third)) continue;
10567
} else if (next != 0) {
10568
if (buf[base + 1] != next) continue;
10570
ctxt->checkIndex = 0;
10573
xmlGenericError(xmlGenericErrorContext,
10574
"PP: lookup '%c' found at %d\n",
10576
else if (third == 0)
10577
xmlGenericError(xmlGenericErrorContext,
10578
"PP: lookup '%c%c' found at %d\n",
10579
first, next, base);
10581
xmlGenericError(xmlGenericErrorContext,
10582
"PP: lookup '%c%c%c' found at %d\n",
10583
first, next, third, base);
10585
return(base - (in->cur - in->base));
10588
ctxt->checkIndex = base;
10591
xmlGenericError(xmlGenericErrorContext,
10592
"PP: lookup '%c' failed\n", first);
10593
else if (third == 0)
10594
xmlGenericError(xmlGenericErrorContext,
10595
"PP: lookup '%c%c' failed\n", first, next);
10597
xmlGenericError(xmlGenericErrorContext,
10598
"PP: lookup '%c%c%c' failed\n", first, next, third);
10604
* xmlParseGetLasts:
10605
* @ctxt: an XML parser context
10606
* @lastlt: pointer to store the last '<' from the input
10607
* @lastgt: pointer to store the last '>' from the input
10609
* Lookup the last < and > in the current chunk
10612
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10613
const xmlChar **lastgt) {
10614
const xmlChar *tmp;
10616
if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10617
xmlGenericError(xmlGenericErrorContext,
10618
"Internal error: xmlParseGetLasts\n");
10621
if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10622
tmp = ctxt->input->end;
10624
while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10625
if (tmp < ctxt->input->base) {
10631
while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10632
if (*tmp == '\'') {
10634
while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10635
if (tmp < ctxt->input->end) tmp++;
10636
} else if (*tmp == '"') {
10638
while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10639
if (tmp < ctxt->input->end) tmp++;
10643
if (tmp < ctxt->input->end)
10648
while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10649
if (tmp >= ctxt->input->base)
10661
* xmlCheckCdataPush:
10662
* @cur: pointer to the bock of characters
10663
* @len: length of the block in bytes
10665
* Check that the block of characters is okay as SCdata content [20]
10667
* Returns the number of bytes to pass if okay, a negative index where an
10668
* UTF-8 error occured otherwise
10671
xmlCheckCdataPush(const xmlChar *utf, int len) {
10676
if ((utf == NULL) || (len <= 0))
10679
for (ix = 0; ix < len;) { /* string is 0-terminated */
10681
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10684
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10688
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10689
if (ix + 2 > len) return(ix);
10690
if ((utf[ix+1] & 0xc0 ) != 0x80)
10692
codepoint = (utf[ix] & 0x1f) << 6;
10693
codepoint |= utf[ix+1] & 0x3f;
10694
if (!xmlIsCharQ(codepoint))
10697
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10698
if (ix + 3 > len) return(ix);
10699
if (((utf[ix+1] & 0xc0) != 0x80) ||
10700
((utf[ix+2] & 0xc0) != 0x80))
10702
codepoint = (utf[ix] & 0xf) << 12;
10703
codepoint |= (utf[ix+1] & 0x3f) << 6;
10704
codepoint |= utf[ix+2] & 0x3f;
10705
if (!xmlIsCharQ(codepoint))
10708
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10709
if (ix + 4 > len) return(ix);
10710
if (((utf[ix+1] & 0xc0) != 0x80) ||
10711
((utf[ix+2] & 0xc0) != 0x80) ||
10712
((utf[ix+3] & 0xc0) != 0x80))
10714
codepoint = (utf[ix] & 0x7) << 18;
10715
codepoint |= (utf[ix+1] & 0x3f) << 12;
10716
codepoint |= (utf[ix+2] & 0x3f) << 6;
10717
codepoint |= utf[ix+3] & 0x3f;
10718
if (!xmlIsCharQ(codepoint))
10721
} else /* unknown encoding */
10728
* xmlParseTryOrFinish:
10729
* @ctxt: an XML parser context
10730
* @terminate: last chunk indicator
10732
* Try to progress on parsing
10734
* Returns zero if no parsing was possible
10737
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10741
const xmlChar *lastlt, *lastgt;
10743
if (ctxt->input == NULL)
10747
switch (ctxt->instate) {
10748
case XML_PARSER_EOF:
10749
xmlGenericError(xmlGenericErrorContext,
10750
"PP: try EOF\n"); break;
10751
case XML_PARSER_START:
10752
xmlGenericError(xmlGenericErrorContext,
10753
"PP: try START\n"); break;
10754
case XML_PARSER_MISC:
10755
xmlGenericError(xmlGenericErrorContext,
10756
"PP: try MISC\n");break;
10757
case XML_PARSER_COMMENT:
10758
xmlGenericError(xmlGenericErrorContext,
10759
"PP: try COMMENT\n");break;
10760
case XML_PARSER_PROLOG:
10761
xmlGenericError(xmlGenericErrorContext,
10762
"PP: try PROLOG\n");break;
10763
case XML_PARSER_START_TAG:
10764
xmlGenericError(xmlGenericErrorContext,
10765
"PP: try START_TAG\n");break;
10766
case XML_PARSER_CONTENT:
10767
xmlGenericError(xmlGenericErrorContext,
10768
"PP: try CONTENT\n");break;
10769
case XML_PARSER_CDATA_SECTION:
10770
xmlGenericError(xmlGenericErrorContext,
10771
"PP: try CDATA_SECTION\n");break;
10772
case XML_PARSER_END_TAG:
10773
xmlGenericError(xmlGenericErrorContext,
10774
"PP: try END_TAG\n");break;
10775
case XML_PARSER_ENTITY_DECL:
10776
xmlGenericError(xmlGenericErrorContext,
10777
"PP: try ENTITY_DECL\n");break;
10778
case XML_PARSER_ENTITY_VALUE:
10779
xmlGenericError(xmlGenericErrorContext,
10780
"PP: try ENTITY_VALUE\n");break;
10781
case XML_PARSER_ATTRIBUTE_VALUE:
10782
xmlGenericError(xmlGenericErrorContext,
10783
"PP: try ATTRIBUTE_VALUE\n");break;
10784
case XML_PARSER_DTD:
10785
xmlGenericError(xmlGenericErrorContext,
10786
"PP: try DTD\n");break;
10787
case XML_PARSER_EPILOG:
10788
xmlGenericError(xmlGenericErrorContext,
10789
"PP: try EPILOG\n");break;
10790
case XML_PARSER_PI:
10791
xmlGenericError(xmlGenericErrorContext,
10792
"PP: try PI\n");break;
10793
case XML_PARSER_IGNORE:
10794
xmlGenericError(xmlGenericErrorContext,
10795
"PP: try IGNORE\n");break;
10799
if ((ctxt->input != NULL) &&
10800
(ctxt->input->cur - ctxt->input->base > 4096)) {
10802
ctxt->checkIndex = 0;
10804
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10807
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10812
* Pop-up of finished entities.
10814
while ((RAW == 0) && (ctxt->inputNr > 1))
10817
if (ctxt->input == NULL) break;
10818
if (ctxt->input->buf == NULL)
10819
avail = ctxt->input->length -
10820
(ctxt->input->cur - ctxt->input->base);
10823
* If we are operating on converted input, try to flush
10824
* remainng chars to avoid them stalling in the non-converted
10827
if ((ctxt->input->buf->raw != NULL) &&
10828
(ctxt->input->buf->raw->use > 0)) {
10829
int base = ctxt->input->base -
10830
ctxt->input->buf->buffer->content;
10831
int current = ctxt->input->cur - ctxt->input->base;
10833
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10834
ctxt->input->base = ctxt->input->buf->buffer->content + base;
10835
ctxt->input->cur = ctxt->input->base + current;
10837
&ctxt->input->buf->buffer->content[
10838
ctxt->input->buf->buffer->use];
10840
avail = ctxt->input->buf->buffer->use -
10841
(ctxt->input->cur - ctxt->input->base);
10845
switch (ctxt->instate) {
10846
case XML_PARSER_EOF:
10848
* Document parsing is done !
10851
case XML_PARSER_START:
10852
if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10854
xmlCharEncoding enc;
10857
* Very first chars read from the document flow.
10863
* Get the 4 first bytes and decode the charset
10864
* if enc != XML_CHAR_ENCODING_NONE
10865
* plug some encoding conversion routines,
10866
* else xmlSwitchEncoding will set to (default)
10873
enc = xmlDetectCharEncoding(start, 4);
10874
xmlSwitchEncoding(ctxt, enc);
10880
cur = ctxt->input->cur[0];
10881
next = ctxt->input->cur[1];
10883
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10884
ctxt->sax->setDocumentLocator(ctxt->userData,
10885
&xmlDefaultSAXLocator);
10886
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10887
ctxt->instate = XML_PARSER_EOF;
10889
xmlGenericError(xmlGenericErrorContext,
10890
"PP: entering EOF\n");
10892
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10893
ctxt->sax->endDocument(ctxt->userData);
10896
if ((cur == '<') && (next == '?')) {
10897
/* PI or XML decl */
10898
if (avail < 5) return(ret);
10899
if ((!terminate) &&
10900
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10902
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10903
ctxt->sax->setDocumentLocator(ctxt->userData,
10904
&xmlDefaultSAXLocator);
10905
if ((ctxt->input->cur[2] == 'x') &&
10906
(ctxt->input->cur[3] == 'm') &&
10907
(ctxt->input->cur[4] == 'l') &&
10908
(IS_BLANK_CH(ctxt->input->cur[5]))) {
10911
xmlGenericError(xmlGenericErrorContext,
10912
"PP: Parsing XML Decl\n");
10914
xmlParseXMLDecl(ctxt);
10915
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10917
* The XML REC instructs us to stop parsing right
10920
ctxt->instate = XML_PARSER_EOF;
10923
ctxt->standalone = ctxt->input->standalone;
10924
if ((ctxt->encoding == NULL) &&
10925
(ctxt->input->encoding != NULL))
10926
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10927
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10928
(!ctxt->disableSAX))
10929
ctxt->sax->startDocument(ctxt->userData);
10930
ctxt->instate = XML_PARSER_MISC;
10932
xmlGenericError(xmlGenericErrorContext,
10933
"PP: entering MISC\n");
10936
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10937
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10938
(!ctxt->disableSAX))
10939
ctxt->sax->startDocument(ctxt->userData);
10940
ctxt->instate = XML_PARSER_MISC;
10942
xmlGenericError(xmlGenericErrorContext,
10943
"PP: entering MISC\n");
10947
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10948
ctxt->sax->setDocumentLocator(ctxt->userData,
10949
&xmlDefaultSAXLocator);
10950
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10951
if (ctxt->version == NULL) {
10952
xmlErrMemory(ctxt, NULL);
10955
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10956
(!ctxt->disableSAX))
10957
ctxt->sax->startDocument(ctxt->userData);
10958
ctxt->instate = XML_PARSER_MISC;
10960
xmlGenericError(xmlGenericErrorContext,
10961
"PP: entering MISC\n");
10965
case XML_PARSER_START_TAG: {
10966
const xmlChar *name;
10967
const xmlChar *prefix = NULL;
10968
const xmlChar *URI = NULL;
10969
int nsNr = ctxt->nsNr;
10971
if ((avail < 2) && (ctxt->inputNr == 1))
10973
cur = ctxt->input->cur[0];
10975
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10976
ctxt->instate = XML_PARSER_EOF;
10977
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10978
ctxt->sax->endDocument(ctxt->userData);
10982
if (ctxt->progressive) {
10983
/* > can be found unescaped in attribute values */
10984
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10986
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10990
if (ctxt->spaceNr == 0)
10991
spacePush(ctxt, -1);
10992
else if (*ctxt->space == -2)
10993
spacePush(ctxt, -1);
10995
spacePush(ctxt, *ctxt->space);
10996
#ifdef LIBXML_SAX1_ENABLED
10998
#endif /* LIBXML_SAX1_ENABLED */
10999
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11000
#ifdef LIBXML_SAX1_ENABLED
11002
name = xmlParseStartTag(ctxt);
11003
#endif /* LIBXML_SAX1_ENABLED */
11004
if (ctxt->instate == XML_PARSER_EOF)
11006
if (name == NULL) {
11008
ctxt->instate = XML_PARSER_EOF;
11009
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11010
ctxt->sax->endDocument(ctxt->userData);
11013
#ifdef LIBXML_VALID_ENABLED
11015
* [ VC: Root Element Type ]
11016
* The Name in the document type declaration must match
11017
* the element type of the root element.
11019
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11020
ctxt->node && (ctxt->node == ctxt->myDoc->children))
11021
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11022
#endif /* LIBXML_VALID_ENABLED */
11025
* Check for an Empty Element.
11027
if ((RAW == '/') && (NXT(1) == '>')) {
11031
if ((ctxt->sax != NULL) &&
11032
(ctxt->sax->endElementNs != NULL) &&
11033
(!ctxt->disableSAX))
11034
ctxt->sax->endElementNs(ctxt->userData, name,
11036
if (ctxt->nsNr - nsNr > 0)
11037
nsPop(ctxt, ctxt->nsNr - nsNr);
11038
#ifdef LIBXML_SAX1_ENABLED
11040
if ((ctxt->sax != NULL) &&
11041
(ctxt->sax->endElement != NULL) &&
11042
(!ctxt->disableSAX))
11043
ctxt->sax->endElement(ctxt->userData, name);
11044
#endif /* LIBXML_SAX1_ENABLED */
11047
if (ctxt->nameNr == 0) {
11048
ctxt->instate = XML_PARSER_EPILOG;
11050
ctxt->instate = XML_PARSER_CONTENT;
11057
xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11058
"Couldn't find end of Start Tag %s\n",
11064
nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11065
#ifdef LIBXML_SAX1_ENABLED
11067
namePush(ctxt, name);
11068
#endif /* LIBXML_SAX1_ENABLED */
11070
ctxt->instate = XML_PARSER_CONTENT;
11073
case XML_PARSER_CONTENT: {
11074
const xmlChar *test;
11076
if ((avail < 2) && (ctxt->inputNr == 1))
11078
cur = ctxt->input->cur[0];
11079
next = ctxt->input->cur[1];
11082
cons = ctxt->input->consumed;
11083
if ((cur == '<') && (next == '/')) {
11084
ctxt->instate = XML_PARSER_END_TAG;
11086
} else if ((cur == '<') && (next == '?')) {
11087
if ((!terminate) &&
11088
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11091
} else if ((cur == '<') && (next != '!')) {
11092
ctxt->instate = XML_PARSER_START_TAG;
11094
} else if ((cur == '<') && (next == '!') &&
11095
(ctxt->input->cur[2] == '-') &&
11096
(ctxt->input->cur[3] == '-')) {
11101
ctxt->input->cur += 4;
11102
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11103
ctxt->input->cur -= 4;
11104
if ((!terminate) && (term < 0))
11106
xmlParseComment(ctxt);
11107
ctxt->instate = XML_PARSER_CONTENT;
11108
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11109
(ctxt->input->cur[2] == '[') &&
11110
(ctxt->input->cur[3] == 'C') &&
11111
(ctxt->input->cur[4] == 'D') &&
11112
(ctxt->input->cur[5] == 'A') &&
11113
(ctxt->input->cur[6] == 'T') &&
11114
(ctxt->input->cur[7] == 'A') &&
11115
(ctxt->input->cur[8] == '[')) {
11117
ctxt->instate = XML_PARSER_CDATA_SECTION;
11119
} else if ((cur == '<') && (next == '!') &&
11122
} else if (cur == '&') {
11123
if ((!terminate) &&
11124
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11126
xmlParseReference(ctxt);
11128
/* TODO Avoid the extra copy, handle directly !!! */
11130
* Goal of the following test is:
11131
* - minimize calls to the SAX 'character' callback
11132
* when they are mergeable
11133
* - handle an problem for isBlank when we only parse
11134
* a sequence of blank chars and the next one is
11135
* not available to check against '<' presence.
11136
* - tries to homogenize the differences in SAX
11137
* callbacks between the push and pull versions
11140
if ((ctxt->inputNr == 1) &&
11141
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11143
if (ctxt->progressive) {
11144
if ((lastlt == NULL) ||
11145
(ctxt->input->cur > lastlt))
11147
} else if (xmlParseLookupSequence(ctxt,
11153
ctxt->checkIndex = 0;
11154
xmlParseCharData(ctxt, 0);
11157
* Pop-up of finished entities.
11159
while ((RAW == 0) && (ctxt->inputNr > 1))
11161
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11162
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11163
"detected an error in element content\n");
11164
ctxt->instate = XML_PARSER_EOF;
11169
case XML_PARSER_END_TAG:
11173
if (ctxt->progressive) {
11174
/* > can be found unescaped in attribute values */
11175
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11177
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11182
xmlParseEndTag2(ctxt,
11183
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11184
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11185
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11188
#ifdef LIBXML_SAX1_ENABLED
11190
xmlParseEndTag1(ctxt, 0);
11191
#endif /* LIBXML_SAX1_ENABLED */
11192
if (ctxt->instate == XML_PARSER_EOF) {
11194
} else if (ctxt->nameNr == 0) {
11195
ctxt->instate = XML_PARSER_EPILOG;
11197
ctxt->instate = XML_PARSER_CONTENT;
11200
case XML_PARSER_CDATA_SECTION: {
11202
* The Push mode need to have the SAX callback for
11203
* cdataBlock merge back contiguous callbacks.
11207
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11209
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11212
tmp = xmlCheckCdataPush(ctxt->input->cur,
11213
XML_PARSER_BIG_BUFFER_SIZE);
11216
ctxt->input->cur += tmp;
11217
goto encoding_error;
11219
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11220
if (ctxt->sax->cdataBlock != NULL)
11221
ctxt->sax->cdataBlock(ctxt->userData,
11222
ctxt->input->cur, tmp);
11223
else if (ctxt->sax->characters != NULL)
11224
ctxt->sax->characters(ctxt->userData,
11225
ctxt->input->cur, tmp);
11228
ctxt->checkIndex = 0;
11234
tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11235
if ((tmp < 0) || (tmp != base)) {
11237
ctxt->input->cur += tmp;
11238
goto encoding_error;
11240
if ((ctxt->sax != NULL) && (base == 0) &&
11241
(ctxt->sax->cdataBlock != NULL) &&
11242
(!ctxt->disableSAX)) {
11244
* Special case to provide identical behaviour
11245
* between pull and push parsers on enpty CDATA
11248
if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11249
(!strncmp((const char *)&ctxt->input->cur[-9],
11251
ctxt->sax->cdataBlock(ctxt->userData,
11253
} else if ((ctxt->sax != NULL) && (base > 0) &&
11254
(!ctxt->disableSAX)) {
11255
if (ctxt->sax->cdataBlock != NULL)
11256
ctxt->sax->cdataBlock(ctxt->userData,
11257
ctxt->input->cur, base);
11258
else if (ctxt->sax->characters != NULL)
11259
ctxt->sax->characters(ctxt->userData,
11260
ctxt->input->cur, base);
11263
ctxt->checkIndex = 0;
11264
ctxt->instate = XML_PARSER_CONTENT;
11266
xmlGenericError(xmlGenericErrorContext,
11267
"PP: entering CONTENT\n");
11272
case XML_PARSER_MISC:
11274
if (ctxt->input->buf == NULL)
11275
avail = ctxt->input->length -
11276
(ctxt->input->cur - ctxt->input->base);
11278
avail = ctxt->input->buf->buffer->use -
11279
(ctxt->input->cur - ctxt->input->base);
11282
cur = ctxt->input->cur[0];
11283
next = ctxt->input->cur[1];
11284
if ((cur == '<') && (next == '?')) {
11285
if ((!terminate) &&
11286
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11289
xmlGenericError(xmlGenericErrorContext,
11290
"PP: Parsing PI\n");
11293
ctxt->checkIndex = 0;
11294
} else if ((cur == '<') && (next == '!') &&
11295
(ctxt->input->cur[2] == '-') &&
11296
(ctxt->input->cur[3] == '-')) {
11297
if ((!terminate) &&
11298
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11301
xmlGenericError(xmlGenericErrorContext,
11302
"PP: Parsing Comment\n");
11304
xmlParseComment(ctxt);
11305
ctxt->instate = XML_PARSER_MISC;
11306
ctxt->checkIndex = 0;
11307
} else if ((cur == '<') && (next == '!') &&
11308
(ctxt->input->cur[2] == 'D') &&
11309
(ctxt->input->cur[3] == 'O') &&
11310
(ctxt->input->cur[4] == 'C') &&
11311
(ctxt->input->cur[5] == 'T') &&
11312
(ctxt->input->cur[6] == 'Y') &&
11313
(ctxt->input->cur[7] == 'P') &&
11314
(ctxt->input->cur[8] == 'E')) {
11315
if ((!terminate) &&
11316
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11319
xmlGenericError(xmlGenericErrorContext,
11320
"PP: Parsing internal subset\n");
11322
ctxt->inSubset = 1;
11323
xmlParseDocTypeDecl(ctxt);
11325
ctxt->instate = XML_PARSER_DTD;
11327
xmlGenericError(xmlGenericErrorContext,
11328
"PP: entering DTD\n");
11332
* Create and update the external subset.
11334
ctxt->inSubset = 2;
11335
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11336
(ctxt->sax->externalSubset != NULL))
11337
ctxt->sax->externalSubset(ctxt->userData,
11338
ctxt->intSubName, ctxt->extSubSystem,
11340
ctxt->inSubset = 0;
11341
xmlCleanSpecialAttr(ctxt);
11342
ctxt->instate = XML_PARSER_PROLOG;
11344
xmlGenericError(xmlGenericErrorContext,
11345
"PP: entering PROLOG\n");
11348
} else if ((cur == '<') && (next == '!') &&
11352
ctxt->instate = XML_PARSER_START_TAG;
11353
ctxt->progressive = 1;
11354
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11356
xmlGenericError(xmlGenericErrorContext,
11357
"PP: entering START_TAG\n");
11361
case XML_PARSER_PROLOG:
11363
if (ctxt->input->buf == NULL)
11364
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11366
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11369
cur = ctxt->input->cur[0];
11370
next = ctxt->input->cur[1];
11371
if ((cur == '<') && (next == '?')) {
11372
if ((!terminate) &&
11373
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11376
xmlGenericError(xmlGenericErrorContext,
11377
"PP: Parsing PI\n");
11380
} else if ((cur == '<') && (next == '!') &&
11381
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11382
if ((!terminate) &&
11383
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11386
xmlGenericError(xmlGenericErrorContext,
11387
"PP: Parsing Comment\n");
11389
xmlParseComment(ctxt);
11390
ctxt->instate = XML_PARSER_PROLOG;
11391
} else if ((cur == '<') && (next == '!') &&
11395
ctxt->instate = XML_PARSER_START_TAG;
11396
if (ctxt->progressive == 0)
11397
ctxt->progressive = 1;
11398
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11400
xmlGenericError(xmlGenericErrorContext,
11401
"PP: entering START_TAG\n");
11405
case XML_PARSER_EPILOG:
11407
if (ctxt->input->buf == NULL)
11408
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11410
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11413
cur = ctxt->input->cur[0];
11414
next = ctxt->input->cur[1];
11415
if ((cur == '<') && (next == '?')) {
11416
if ((!terminate) &&
11417
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11420
xmlGenericError(xmlGenericErrorContext,
11421
"PP: Parsing PI\n");
11424
ctxt->instate = XML_PARSER_EPILOG;
11425
} else if ((cur == '<') && (next == '!') &&
11426
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11427
if ((!terminate) &&
11428
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11431
xmlGenericError(xmlGenericErrorContext,
11432
"PP: Parsing Comment\n");
11434
xmlParseComment(ctxt);
11435
ctxt->instate = XML_PARSER_EPILOG;
11436
} else if ((cur == '<') && (next == '!') &&
11440
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11441
ctxt->instate = XML_PARSER_EOF;
11443
xmlGenericError(xmlGenericErrorContext,
11444
"PP: entering EOF\n");
11446
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11447
ctxt->sax->endDocument(ctxt->userData);
11451
case XML_PARSER_DTD: {
11453
* Sorry but progressive parsing of the internal subset
11454
* is not expected to be supported. We first check that
11455
* the full content of the internal subset is available and
11456
* the parsing is launched only at that point.
11457
* Internal subset ends up with "']' S? '>'" in an unescaped
11458
* section and not in a ']]>' sequence which are conditional
11459
* sections (whoever argued to keep that crap in XML deserve
11460
* a place in hell !).
11466
base = ctxt->input->cur - ctxt->input->base;
11467
if (base < 0) return(0);
11468
if (ctxt->checkIndex > base)
11469
base = ctxt->checkIndex;
11470
buf = ctxt->input->buf->buffer->content;
11471
for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11474
if (buf[base] == quote)
11478
if ((quote == 0) && (buf[base] == '<')) {
11480
/* special handling of comments */
11481
if (((unsigned int) base + 4 <
11482
ctxt->input->buf->buffer->use) &&
11483
(buf[base + 1] == '!') &&
11484
(buf[base + 2] == '-') &&
11485
(buf[base + 3] == '-')) {
11486
for (;(unsigned int) base + 3 <
11487
ctxt->input->buf->buffer->use; base++) {
11488
if ((buf[base] == '-') &&
11489
(buf[base + 1] == '-') &&
11490
(buf[base + 2] == '>')) {
11498
fprintf(stderr, "unfinished comment\n");
11505
if (buf[base] == '"') {
11509
if (buf[base] == '\'') {
11513
if (buf[base] == ']') {
11515
fprintf(stderr, "%c%c%c%c: ", buf[base],
11516
buf[base + 1], buf[base + 2], buf[base + 3]);
11518
if ((unsigned int) base +1 >=
11519
ctxt->input->buf->buffer->use)
11521
if (buf[base + 1] == ']') {
11522
/* conditional crap, skip both ']' ! */
11527
(unsigned int) base + i < ctxt->input->buf->buffer->use;
11529
if (buf[base + i] == '>') {
11531
fprintf(stderr, "found\n");
11533
goto found_end_int_subset;
11535
if (!IS_BLANK_CH(buf[base + i])) {
11537
fprintf(stderr, "not found\n");
11539
goto not_end_of_int_subset;
11543
fprintf(stderr, "end of stream\n");
11548
not_end_of_int_subset:
11549
continue; /* for */
11552
* We didn't found the end of the Internal subset
11556
xmlGenericError(xmlGenericErrorContext,
11557
"PP: lookup of int subset end filed\n");
11561
found_end_int_subset:
11562
xmlParseInternalSubset(ctxt);
11563
ctxt->inSubset = 2;
11564
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11565
(ctxt->sax->externalSubset != NULL))
11566
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11567
ctxt->extSubSystem, ctxt->extSubURI);
11568
ctxt->inSubset = 0;
11569
xmlCleanSpecialAttr(ctxt);
11570
ctxt->instate = XML_PARSER_PROLOG;
11571
ctxt->checkIndex = 0;
11573
xmlGenericError(xmlGenericErrorContext,
11574
"PP: entering PROLOG\n");
11578
case XML_PARSER_COMMENT:
11579
xmlGenericError(xmlGenericErrorContext,
11580
"PP: internal error, state == COMMENT\n");
11581
ctxt->instate = XML_PARSER_CONTENT;
11583
xmlGenericError(xmlGenericErrorContext,
11584
"PP: entering CONTENT\n");
11587
case XML_PARSER_IGNORE:
11588
xmlGenericError(xmlGenericErrorContext,
11589
"PP: internal error, state == IGNORE");
11590
ctxt->instate = XML_PARSER_DTD;
11592
xmlGenericError(xmlGenericErrorContext,
11593
"PP: entering DTD\n");
11596
case XML_PARSER_PI:
11597
xmlGenericError(xmlGenericErrorContext,
11598
"PP: internal error, state == PI\n");
11599
ctxt->instate = XML_PARSER_CONTENT;
11601
xmlGenericError(xmlGenericErrorContext,
11602
"PP: entering CONTENT\n");
11605
case XML_PARSER_ENTITY_DECL:
11606
xmlGenericError(xmlGenericErrorContext,
11607
"PP: internal error, state == ENTITY_DECL\n");
11608
ctxt->instate = XML_PARSER_DTD;
11610
xmlGenericError(xmlGenericErrorContext,
11611
"PP: entering DTD\n");
11614
case XML_PARSER_ENTITY_VALUE:
11615
xmlGenericError(xmlGenericErrorContext,
11616
"PP: internal error, state == ENTITY_VALUE\n");
11617
ctxt->instate = XML_PARSER_CONTENT;
11619
xmlGenericError(xmlGenericErrorContext,
11620
"PP: entering DTD\n");
11623
case XML_PARSER_ATTRIBUTE_VALUE:
11624
xmlGenericError(xmlGenericErrorContext,
11625
"PP: internal error, state == ATTRIBUTE_VALUE\n");
11626
ctxt->instate = XML_PARSER_START_TAG;
11628
xmlGenericError(xmlGenericErrorContext,
11629
"PP: entering START_TAG\n");
11632
case XML_PARSER_SYSTEM_LITERAL:
11633
xmlGenericError(xmlGenericErrorContext,
11634
"PP: internal error, state == SYSTEM_LITERAL\n");
11635
ctxt->instate = XML_PARSER_START_TAG;
11637
xmlGenericError(xmlGenericErrorContext,
11638
"PP: entering START_TAG\n");
11641
case XML_PARSER_PUBLIC_LITERAL:
11642
xmlGenericError(xmlGenericErrorContext,
11643
"PP: internal error, state == PUBLIC_LITERAL\n");
11644
ctxt->instate = XML_PARSER_START_TAG;
11646
xmlGenericError(xmlGenericErrorContext,
11647
"PP: entering START_TAG\n");
11654
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11661
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11662
ctxt->input->cur[0], ctxt->input->cur[1],
11663
ctxt->input->cur[2], ctxt->input->cur[3]);
11664
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11665
"Input is not proper UTF-8, indicate encoding !\n%s",
11666
BAD_CAST buffer, NULL);
11673
* @ctxt: an XML parser context
11674
* @chunk: an char array
11675
* @size: the size in byte of the chunk
11676
* @terminate: last chunk indicator
11678
* Parse a Chunk of memory
11680
* Returns zero if no error, the xmlParserErrors otherwise.
11683
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11689
return(XML_ERR_INTERNAL_ERROR);
11690
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11691
return(ctxt->errNo);
11692
if (ctxt->instate == XML_PARSER_START)
11693
xmlDetectSAX2(ctxt);
11694
if ((size > 0) && (chunk != NULL) && (!terminate) &&
11695
(chunk[size - 1] == '\r')) {
11702
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11703
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11704
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11705
int cur = ctxt->input->cur - ctxt->input->base;
11709
* Specific handling if we autodetected an encoding, we should not
11710
* push more than the first line ... which depend on the encoding
11711
* And only push the rest once the final encoding was detected
11713
if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11714
(ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11715
unsigned int len = 45;
11717
if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11718
BAD_CAST "UTF-16")) ||
11719
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11720
BAD_CAST "UTF16")))
11722
else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11723
BAD_CAST "UCS-4")) ||
11724
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11728
if (ctxt->input->buf->rawconsumed < len)
11729
len -= ctxt->input->buf->rawconsumed;
11732
* Change size for reading the initial declaration only
11733
* if size is greater than len. Otherwise, memmove in xmlBufferAdd
11734
* will blindly copy extra bytes from memory.
11736
if ((unsigned int) size > len) {
11737
remain = size - len;
11743
res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11745
ctxt->errNo = XML_PARSER_EOF;
11746
ctxt->disableSAX = 1;
11747
return (XML_PARSER_EOF);
11749
ctxt->input->base = ctxt->input->buf->buffer->content + base;
11750
ctxt->input->cur = ctxt->input->base + cur;
11752
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11754
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11757
} else if (ctxt->instate != XML_PARSER_EOF) {
11758
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11759
xmlParserInputBufferPtr in = ctxt->input->buf;
11760
if ((in->encoder != NULL) && (in->buffer != NULL) &&
11761
(in->raw != NULL)) {
11764
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11767
xmlGenericError(xmlGenericErrorContext,
11768
"xmlParseChunk: encoder error\n");
11769
return(XML_ERR_INVALID_ENCODING);
11775
xmlParseTryOrFinish(ctxt, 0);
11777
xmlParseTryOrFinish(ctxt, terminate);
11778
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11779
return(ctxt->errNo);
11787
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11788
(ctxt->input->buf != NULL)) {
11789
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11793
* Check for termination
11797
if (ctxt->input != NULL) {
11798
if (ctxt->input->buf == NULL)
11799
avail = ctxt->input->length -
11800
(ctxt->input->cur - ctxt->input->base);
11802
avail = ctxt->input->buf->buffer->use -
11803
(ctxt->input->cur - ctxt->input->base);
11806
if ((ctxt->instate != XML_PARSER_EOF) &&
11807
(ctxt->instate != XML_PARSER_EPILOG)) {
11808
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11810
if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11811
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11813
if (ctxt->instate != XML_PARSER_EOF) {
11814
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11815
ctxt->sax->endDocument(ctxt->userData);
11817
ctxt->instate = XML_PARSER_EOF;
11819
return((xmlParserErrors) ctxt->errNo);
11822
/************************************************************************
11824
* I/O front end functions to the parser *
11826
************************************************************************/
11829
* xmlCreatePushParserCtxt:
11830
* @sax: a SAX handler
11831
* @user_data: The user data returned on SAX callbacks
11832
* @chunk: a pointer to an array of chars
11833
* @size: number of chars in the array
11834
* @filename: an optional file name or URI
11836
* Create a parser context for using the XML parser in push mode.
11837
* If @buffer and @size are non-NULL, the data is used to detect
11838
* the encoding. The remaining characters will be parsed so they
11839
* don't need to be fed in again through xmlParseChunk.
11840
* To allow content encoding detection, @size should be >= 4
11841
* The value of @filename is used for fetching external entities
11842
* and error/warning reports.
11844
* Returns the new parser context or NULL
11848
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11849
const char *chunk, int size, const char *filename) {
11850
xmlParserCtxtPtr ctxt;
11851
xmlParserInputPtr inputStream;
11852
xmlParserInputBufferPtr buf;
11853
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11856
* plug some encoding conversion routines
11858
if ((chunk != NULL) && (size >= 4))
11859
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11861
buf = xmlAllocParserInputBuffer(enc);
11862
if (buf == NULL) return(NULL);
11864
ctxt = xmlNewParserCtxt();
11865
if (ctxt == NULL) {
11866
xmlErrMemory(NULL, "creating parser: out of memory\n");
11867
xmlFreeParserInputBuffer(buf);
11870
ctxt->dictNames = 1;
11871
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11872
if (ctxt->pushTab == NULL) {
11873
xmlErrMemory(ctxt, NULL);
11874
xmlFreeParserInputBuffer(buf);
11875
xmlFreeParserCtxt(ctxt);
11879
#ifdef LIBXML_SAX1_ENABLED
11880
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11881
#endif /* LIBXML_SAX1_ENABLED */
11882
xmlFree(ctxt->sax);
11883
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11884
if (ctxt->sax == NULL) {
11885
xmlErrMemory(ctxt, NULL);
11886
xmlFreeParserInputBuffer(buf);
11887
xmlFreeParserCtxt(ctxt);
11890
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11891
if (sax->initialized == XML_SAX2_MAGIC)
11892
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11894
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11895
if (user_data != NULL)
11896
ctxt->userData = user_data;
11898
if (filename == NULL) {
11899
ctxt->directory = NULL;
11901
ctxt->directory = xmlParserGetDirectory(filename);
11904
inputStream = xmlNewInputStream(ctxt);
11905
if (inputStream == NULL) {
11906
xmlFreeParserCtxt(ctxt);
11907
xmlFreeParserInputBuffer(buf);
11911
if (filename == NULL)
11912
inputStream->filename = NULL;
11914
inputStream->filename = (char *)
11915
xmlCanonicPath((const xmlChar *) filename);
11916
if (inputStream->filename == NULL) {
11917
xmlFreeParserCtxt(ctxt);
11918
xmlFreeParserInputBuffer(buf);
11922
inputStream->buf = buf;
11923
inputStream->base = inputStream->buf->buffer->content;
11924
inputStream->cur = inputStream->buf->buffer->content;
11926
&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11928
inputPush(ctxt, inputStream);
11931
* If the caller didn't provide an initial 'chunk' for determining
11932
* the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11933
* that it can be automatically determined later
11935
if ((size == 0) || (chunk == NULL)) {
11936
ctxt->charset = XML_CHAR_ENCODING_NONE;
11937
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11938
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11939
int cur = ctxt->input->cur - ctxt->input->base;
11941
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11943
ctxt->input->base = ctxt->input->buf->buffer->content + base;
11944
ctxt->input->cur = ctxt->input->base + cur;
11946
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11948
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11952
if (enc != XML_CHAR_ENCODING_NONE) {
11953
xmlSwitchEncoding(ctxt, enc);
11958
#endif /* LIBXML_PUSH_ENABLED */
11962
* @ctxt: an XML parser context
11964
* Blocks further parser processing
11967
xmlStopParser(xmlParserCtxtPtr ctxt) {
11970
ctxt->instate = XML_PARSER_EOF;
11971
ctxt->disableSAX = 1;
11972
if (ctxt->input != NULL) {
11973
ctxt->input->cur = BAD_CAST"";
11974
ctxt->input->base = ctxt->input->cur;
11979
* xmlCreateIOParserCtxt:
11980
* @sax: a SAX handler
11981
* @user_data: The user data returned on SAX callbacks
11982
* @ioread: an I/O read function
11983
* @ioclose: an I/O close function
11984
* @ioctx: an I/O handler
11985
* @enc: the charset encoding if known
11987
* Create a parser context for using the XML parser with an existing
11990
* Returns the new parser context or NULL
11993
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11994
xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11995
void *ioctx, xmlCharEncoding enc) {
11996
xmlParserCtxtPtr ctxt;
11997
xmlParserInputPtr inputStream;
11998
xmlParserInputBufferPtr buf;
12000
if (ioread == NULL) return(NULL);
12002
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12004
if (ioclose != NULL)
12009
ctxt = xmlNewParserCtxt();
12010
if (ctxt == NULL) {
12011
xmlFreeParserInputBuffer(buf);
12015
#ifdef LIBXML_SAX1_ENABLED
12016
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12017
#endif /* LIBXML_SAX1_ENABLED */
12018
xmlFree(ctxt->sax);
12019
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12020
if (ctxt->sax == NULL) {
12021
xmlErrMemory(ctxt, NULL);
12022
xmlFreeParserCtxt(ctxt);
12025
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12026
if (sax->initialized == XML_SAX2_MAGIC)
12027
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12029
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12030
if (user_data != NULL)
12031
ctxt->userData = user_data;
12034
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12035
if (inputStream == NULL) {
12036
xmlFreeParserCtxt(ctxt);
12039
inputPush(ctxt, inputStream);
12044
#ifdef LIBXML_VALID_ENABLED
12045
/************************************************************************
12047
* Front ends when parsing a DTD *
12049
************************************************************************/
12053
* @sax: the SAX handler block or NULL
12054
* @input: an Input Buffer
12055
* @enc: the charset encoding if known
12057
* Load and parse a DTD
12059
* Returns the resulting xmlDtdPtr or NULL in case of error.
12060
* @input will be freed by the function in any case.
12064
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12065
xmlCharEncoding enc) {
12066
xmlDtdPtr ret = NULL;
12067
xmlParserCtxtPtr ctxt;
12068
xmlParserInputPtr pinput = NULL;
12074
ctxt = xmlNewParserCtxt();
12075
if (ctxt == NULL) {
12076
xmlFreeParserInputBuffer(input);
12081
* Set-up the SAX context
12084
if (ctxt->sax != NULL)
12085
xmlFree(ctxt->sax);
12087
ctxt->userData = ctxt;
12089
xmlDetectSAX2(ctxt);
12092
* generate a parser input from the I/O handler
12095
pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12096
if (pinput == NULL) {
12097
if (sax != NULL) ctxt->sax = NULL;
12098
xmlFreeParserInputBuffer(input);
12099
xmlFreeParserCtxt(ctxt);
12104
* plug some encoding conversion routines here.
12106
if (xmlPushInput(ctxt, pinput) < 0) {
12107
if (sax != NULL) ctxt->sax = NULL;
12108
xmlFreeParserCtxt(ctxt);
12111
if (enc != XML_CHAR_ENCODING_NONE) {
12112
xmlSwitchEncoding(ctxt, enc);
12115
pinput->filename = NULL;
12118
pinput->base = ctxt->input->cur;
12119
pinput->cur = ctxt->input->cur;
12120
pinput->free = NULL;
12123
* let's parse that entity knowing it's an external subset.
12125
ctxt->inSubset = 2;
12126
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12127
if (ctxt->myDoc == NULL) {
12128
xmlErrMemory(ctxt, "New Doc failed");
12131
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12132
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12133
BAD_CAST "none", BAD_CAST "none");
12135
if ((enc == XML_CHAR_ENCODING_NONE) &&
12136
((ctxt->input->end - ctxt->input->cur) >= 4)) {
12138
* Get the 4 first bytes and decode the charset
12139
* if enc != XML_CHAR_ENCODING_NONE
12140
* plug some encoding conversion routines.
12146
enc = xmlDetectCharEncoding(start, 4);
12147
if (enc != XML_CHAR_ENCODING_NONE) {
12148
xmlSwitchEncoding(ctxt, enc);
12152
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12154
if (ctxt->myDoc != NULL) {
12155
if (ctxt->wellFormed) {
12156
ret = ctxt->myDoc->extSubset;
12157
ctxt->myDoc->extSubset = NULL;
12162
tmp = ret->children;
12163
while (tmp != NULL) {
12171
xmlFreeDoc(ctxt->myDoc);
12172
ctxt->myDoc = NULL;
12174
if (sax != NULL) ctxt->sax = NULL;
12175
xmlFreeParserCtxt(ctxt);
12182
* @sax: the SAX handler block
12183
* @ExternalID: a NAME* containing the External ID of the DTD
12184
* @SystemID: a NAME* containing the URL to the DTD
12186
* Load and parse an external subset.
12188
* Returns the resulting xmlDtdPtr or NULL in case of error.
12192
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12193
const xmlChar *SystemID) {
12194
xmlDtdPtr ret = NULL;
12195
xmlParserCtxtPtr ctxt;
12196
xmlParserInputPtr input = NULL;
12197
xmlCharEncoding enc;
12198
xmlChar* systemIdCanonic;
12200
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12202
ctxt = xmlNewParserCtxt();
12203
if (ctxt == NULL) {
12208
* Set-up the SAX context
12211
if (ctxt->sax != NULL)
12212
xmlFree(ctxt->sax);
12214
ctxt->userData = ctxt;
12218
* Canonicalise the system ID
12220
systemIdCanonic = xmlCanonicPath(SystemID);
12221
if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12222
xmlFreeParserCtxt(ctxt);
12227
* Ask the Entity resolver to load the damn thing
12230
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12231
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12233
if (input == NULL) {
12234
if (sax != NULL) ctxt->sax = NULL;
12235
xmlFreeParserCtxt(ctxt);
12236
if (systemIdCanonic != NULL)
12237
xmlFree(systemIdCanonic);
12242
* plug some encoding conversion routines here.
12244
if (xmlPushInput(ctxt, input) < 0) {
12245
if (sax != NULL) ctxt->sax = NULL;
12246
xmlFreeParserCtxt(ctxt);
12247
if (systemIdCanonic != NULL)
12248
xmlFree(systemIdCanonic);
12251
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12252
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12253
xmlSwitchEncoding(ctxt, enc);
12256
if (input->filename == NULL)
12257
input->filename = (char *) systemIdCanonic;
12259
xmlFree(systemIdCanonic);
12262
input->base = ctxt->input->cur;
12263
input->cur = ctxt->input->cur;
12264
input->free = NULL;
12267
* let's parse that entity knowing it's an external subset.
12269
ctxt->inSubset = 2;
12270
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12271
if (ctxt->myDoc == NULL) {
12272
xmlErrMemory(ctxt, "New Doc failed");
12273
if (sax != NULL) ctxt->sax = NULL;
12274
xmlFreeParserCtxt(ctxt);
12277
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12278
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12279
ExternalID, SystemID);
12280
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12282
if (ctxt->myDoc != NULL) {
12283
if (ctxt->wellFormed) {
12284
ret = ctxt->myDoc->extSubset;
12285
ctxt->myDoc->extSubset = NULL;
12290
tmp = ret->children;
12291
while (tmp != NULL) {
12299
xmlFreeDoc(ctxt->myDoc);
12300
ctxt->myDoc = NULL;
12302
if (sax != NULL) ctxt->sax = NULL;
12303
xmlFreeParserCtxt(ctxt);
12311
* @ExternalID: a NAME* containing the External ID of the DTD
12312
* @SystemID: a NAME* containing the URL to the DTD
12314
* Load and parse an external subset.
12316
* Returns the resulting xmlDtdPtr or NULL in case of error.
12320
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12321
return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12323
#endif /* LIBXML_VALID_ENABLED */
12325
/************************************************************************
12327
* Front ends when parsing an Entity *
12329
************************************************************************/
12332
* xmlParseCtxtExternalEntity:
12333
* @ctx: the existing parsing context
12334
* @URL: the URL for the entity to load
12335
* @ID: the System ID for the entity to load
12336
* @lst: the return value for the set of parsed nodes
12338
* Parse an external general entity within an existing parsing context
12339
* An external general parsed entity is well-formed if it matches the
12340
* production labeled extParsedEnt.
12342
* [78] extParsedEnt ::= TextDecl? content
12344
* Returns 0 if the entity is well formed, -1 in case of args problem and
12345
* the parser error code otherwise
12349
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12350
const xmlChar *ID, xmlNodePtr *lst) {
12351
xmlParserCtxtPtr ctxt;
12353
xmlNodePtr newRoot;
12354
xmlSAXHandlerPtr oldsax = NULL;
12357
xmlCharEncoding enc;
12359
if (ctx == NULL) return(-1);
12361
if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12362
(ctx->depth > 1024)) {
12363
return(XML_ERR_ENTITY_LOOP);
12368
if ((URL == NULL) && (ID == NULL))
12370
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12373
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12374
if (ctxt == NULL) {
12378
oldsax = ctxt->sax;
12379
ctxt->sax = ctx->sax;
12380
xmlDetectSAX2(ctxt);
12381
newDoc = xmlNewDoc(BAD_CAST "1.0");
12382
if (newDoc == NULL) {
12383
xmlFreeParserCtxt(ctxt);
12386
newDoc->properties = XML_DOC_INTERNAL;
12387
if (ctx->myDoc->dict) {
12388
newDoc->dict = ctx->myDoc->dict;
12389
xmlDictReference(newDoc->dict);
12391
if (ctx->myDoc != NULL) {
12392
newDoc->intSubset = ctx->myDoc->intSubset;
12393
newDoc->extSubset = ctx->myDoc->extSubset;
12395
if (ctx->myDoc->URL != NULL) {
12396
newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12398
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12399
if (newRoot == NULL) {
12400
ctxt->sax = oldsax;
12401
xmlFreeParserCtxt(ctxt);
12402
newDoc->intSubset = NULL;
12403
newDoc->extSubset = NULL;
12404
xmlFreeDoc(newDoc);
12407
xmlAddChild((xmlNodePtr) newDoc, newRoot);
12408
nodePush(ctxt, newDoc->children);
12409
if (ctx->myDoc == NULL) {
12410
ctxt->myDoc = newDoc;
12412
ctxt->myDoc = ctx->myDoc;
12413
newDoc->children->doc = ctx->myDoc;
12417
* Get the 4 first bytes and decode the charset
12418
* if enc != XML_CHAR_ENCODING_NONE
12419
* plug some encoding conversion routines.
12422
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12427
enc = xmlDetectCharEncoding(start, 4);
12428
if (enc != XML_CHAR_ENCODING_NONE) {
12429
xmlSwitchEncoding(ctxt, enc);
12434
* Parse a possible text declaration first
12436
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12437
xmlParseTextDecl(ctxt);
12439
* An XML-1.0 document can't reference an entity not XML-1.0
12441
if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12442
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12443
xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12444
"Version mismatch between document and entity\n");
12449
* If the user provided its own SAX callbacks then reuse the
12450
* useData callback field, otherwise the expected setup in a
12451
* DOM builder is to have userData == ctxt
12453
if (ctx->userData == ctx)
12454
ctxt->userData = ctxt;
12456
ctxt->userData = ctx->userData;
12459
* Doing validity checking on chunk doesn't make sense
12461
ctxt->instate = XML_PARSER_CONTENT;
12462
ctxt->validate = ctx->validate;
12463
ctxt->valid = ctx->valid;
12464
ctxt->loadsubset = ctx->loadsubset;
12465
ctxt->depth = ctx->depth + 1;
12466
ctxt->replaceEntities = ctx->replaceEntities;
12467
if (ctxt->validate) {
12468
ctxt->vctxt.error = ctx->vctxt.error;
12469
ctxt->vctxt.warning = ctx->vctxt.warning;
12471
ctxt->vctxt.error = NULL;
12472
ctxt->vctxt.warning = NULL;
12474
ctxt->vctxt.nodeTab = NULL;
12475
ctxt->vctxt.nodeNr = 0;
12476
ctxt->vctxt.nodeMax = 0;
12477
ctxt->vctxt.node = NULL;
12478
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12479
ctxt->dict = ctx->dict;
12480
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12481
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12482
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12483
ctxt->dictNames = ctx->dictNames;
12484
ctxt->attsDefault = ctx->attsDefault;
12485
ctxt->attsSpecial = ctx->attsSpecial;
12486
ctxt->linenumbers = ctx->linenumbers;
12488
xmlParseContent(ctxt);
12490
ctx->validate = ctxt->validate;
12491
ctx->valid = ctxt->valid;
12492
if ((RAW == '<') && (NXT(1) == '/')) {
12493
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12494
} else if (RAW != 0) {
12495
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12497
if (ctxt->node != newDoc->children) {
12498
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12501
if (!ctxt->wellFormed) {
12502
if (ctxt->errNo == 0)
12511
* Return the newly created nodeset after unlinking it from
12512
* they pseudo parent.
12514
cur = newDoc->children->children;
12516
while (cur != NULL) {
12517
cur->parent = NULL;
12520
newDoc->children->children = NULL;
12524
ctxt->sax = oldsax;
12526
ctxt->attsDefault = NULL;
12527
ctxt->attsSpecial = NULL;
12528
xmlFreeParserCtxt(ctxt);
12529
newDoc->intSubset = NULL;
12530
newDoc->extSubset = NULL;
12531
xmlFreeDoc(newDoc);
12537
* xmlParseExternalEntityPrivate:
12538
* @doc: the document the chunk pertains to
12539
* @oldctxt: the previous parser context if available
12540
* @sax: the SAX handler bloc (possibly NULL)
12541
* @user_data: The user data returned on SAX callbacks (possibly NULL)
12542
* @depth: Used for loop detection, use 0
12543
* @URL: the URL for the entity to load
12544
* @ID: the System ID for the entity to load
12545
* @list: the return value for the set of parsed nodes
12547
* Private version of xmlParseExternalEntity()
12549
* Returns 0 if the entity is well formed, -1 in case of args problem and
12550
* the parser error code otherwise
12553
static xmlParserErrors
12554
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12555
xmlSAXHandlerPtr sax,
12556
void *user_data, int depth, const xmlChar *URL,
12557
const xmlChar *ID, xmlNodePtr *list) {
12558
xmlParserCtxtPtr ctxt;
12560
xmlNodePtr newRoot;
12561
xmlSAXHandlerPtr oldsax = NULL;
12562
xmlParserErrors ret = XML_ERR_OK;
12564
xmlCharEncoding enc;
12566
if (((depth > 40) &&
12567
((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12569
return(XML_ERR_ENTITY_LOOP);
12574
if ((URL == NULL) && (ID == NULL))
12575
return(XML_ERR_INTERNAL_ERROR);
12577
return(XML_ERR_INTERNAL_ERROR);
12580
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12581
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12582
ctxt->userData = ctxt;
12583
if (oldctxt != NULL) {
12584
ctxt->_private = oldctxt->_private;
12585
ctxt->loadsubset = oldctxt->loadsubset;
12586
ctxt->validate = oldctxt->validate;
12587
ctxt->external = oldctxt->external;
12588
ctxt->record_info = oldctxt->record_info;
12589
ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12590
ctxt->node_seq.length = oldctxt->node_seq.length;
12591
ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12594
* Doing validity checking on chunk without context
12595
* doesn't make sense
12597
ctxt->_private = NULL;
12598
ctxt->validate = 0;
12599
ctxt->external = 2;
12600
ctxt->loadsubset = 0;
12603
oldsax = ctxt->sax;
12605
if (user_data != NULL)
12606
ctxt->userData = user_data;
12608
xmlDetectSAX2(ctxt);
12609
newDoc = xmlNewDoc(BAD_CAST "1.0");
12610
if (newDoc == NULL) {
12611
ctxt->node_seq.maximum = 0;
12612
ctxt->node_seq.length = 0;
12613
ctxt->node_seq.buffer = NULL;
12614
xmlFreeParserCtxt(ctxt);
12615
return(XML_ERR_INTERNAL_ERROR);
12617
newDoc->properties = XML_DOC_INTERNAL;
12618
newDoc->intSubset = doc->intSubset;
12619
newDoc->extSubset = doc->extSubset;
12620
newDoc->dict = doc->dict;
12621
xmlDictReference(newDoc->dict);
12623
if (doc->URL != NULL) {
12624
newDoc->URL = xmlStrdup(doc->URL);
12626
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12627
if (newRoot == NULL) {
12629
ctxt->sax = oldsax;
12630
ctxt->node_seq.maximum = 0;
12631
ctxt->node_seq.length = 0;
12632
ctxt->node_seq.buffer = NULL;
12633
xmlFreeParserCtxt(ctxt);
12634
newDoc->intSubset = NULL;
12635
newDoc->extSubset = NULL;
12636
xmlFreeDoc(newDoc);
12637
return(XML_ERR_INTERNAL_ERROR);
12639
xmlAddChild((xmlNodePtr) newDoc, newRoot);
12640
nodePush(ctxt, newDoc->children);
12642
newRoot->doc = doc;
12645
* Get the 4 first bytes and decode the charset
12646
* if enc != XML_CHAR_ENCODING_NONE
12647
* plug some encoding conversion routines.
12650
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12655
enc = xmlDetectCharEncoding(start, 4);
12656
if (enc != XML_CHAR_ENCODING_NONE) {
12657
xmlSwitchEncoding(ctxt, enc);
12662
* Parse a possible text declaration first
12664
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12665
xmlParseTextDecl(ctxt);
12668
ctxt->instate = XML_PARSER_CONTENT;
12669
ctxt->depth = depth;
12671
xmlParseContent(ctxt);
12673
if ((RAW == '<') && (NXT(1) == '/')) {
12674
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12675
} else if (RAW != 0) {
12676
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12678
if (ctxt->node != newDoc->children) {
12679
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12682
if (!ctxt->wellFormed) {
12683
if (ctxt->errNo == 0)
12684
ret = XML_ERR_INTERNAL_ERROR;
12686
ret = (xmlParserErrors)ctxt->errNo;
12688
if (list != NULL) {
12692
* Return the newly created nodeset after unlinking it from
12693
* they pseudo parent.
12695
cur = newDoc->children->children;
12697
while (cur != NULL) {
12698
cur->parent = NULL;
12701
newDoc->children->children = NULL;
12707
* Record in the parent context the number of entities replacement
12708
* done when parsing that reference.
12710
if (oldctxt != NULL)
12711
oldctxt->nbentities += ctxt->nbentities;
12714
* Also record the size of the entity parsed
12716
if (ctxt->input != NULL) {
12717
oldctxt->sizeentities += ctxt->input->consumed;
12718
oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12721
* And record the last error if any
12723
if (ctxt->lastError.code != XML_ERR_OK)
12724
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12727
ctxt->sax = oldsax;
12728
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12729
oldctxt->node_seq.length = ctxt->node_seq.length;
12730
oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12731
ctxt->node_seq.maximum = 0;
12732
ctxt->node_seq.length = 0;
12733
ctxt->node_seq.buffer = NULL;
12734
xmlFreeParserCtxt(ctxt);
12735
newDoc->intSubset = NULL;
12736
newDoc->extSubset = NULL;
12737
xmlFreeDoc(newDoc);
12742
#ifdef LIBXML_SAX1_ENABLED
12744
* xmlParseExternalEntity:
12745
* @doc: the document the chunk pertains to
12746
* @sax: the SAX handler bloc (possibly NULL)
12747
* @user_data: The user data returned on SAX callbacks (possibly NULL)
12748
* @depth: Used for loop detection, use 0
12749
* @URL: the URL for the entity to load
12750
* @ID: the System ID for the entity to load
12751
* @lst: the return value for the set of parsed nodes
12753
* Parse an external general entity
12754
* An external general parsed entity is well-formed if it matches the
12755
* production labeled extParsedEnt.
12757
* [78] extParsedEnt ::= TextDecl? content
12759
* Returns 0 if the entity is well formed, -1 in case of args problem and
12760
* the parser error code otherwise
12764
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12765
int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12766
return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12771
* xmlParseBalancedChunkMemory:
12772
* @doc: the document the chunk pertains to
12773
* @sax: the SAX handler bloc (possibly NULL)
12774
* @user_data: The user data returned on SAX callbacks (possibly NULL)
12775
* @depth: Used for loop detection, use 0
12776
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
12777
* @lst: the return value for the set of parsed nodes
12779
* Parse a well-balanced chunk of an XML document
12780
* called by the parser
12781
* The allowed sequence for the Well Balanced Chunk is the one defined by
12782
* the content production in the XML grammar:
12784
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12786
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
12787
* the parser error code otherwise
12791
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12792
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12793
return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12794
depth, string, lst, 0 );
12796
#endif /* LIBXML_SAX1_ENABLED */
12799
* xmlParseBalancedChunkMemoryInternal:
12800
* @oldctxt: the existing parsing context
12801
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
12802
* @user_data: the user data field for the parser context
12803
* @lst: the return value for the set of parsed nodes
12806
* Parse a well-balanced chunk of an XML document
12807
* called by the parser
12808
* The allowed sequence for the Well Balanced Chunk is the one defined by
12809
* the content production in the XML grammar:
12811
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12813
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
12814
* error code otherwise
12816
* In case recover is set to 1, the nodelist will not be empty even if
12817
* the parsed chunk is not well balanced.
12819
static xmlParserErrors
12820
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12821
const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12822
xmlParserCtxtPtr ctxt;
12823
xmlDocPtr newDoc = NULL;
12824
xmlNodePtr newRoot;
12825
xmlSAXHandlerPtr oldsax = NULL;
12826
xmlNodePtr content = NULL;
12827
xmlNodePtr last = NULL;
12829
xmlParserErrors ret = XML_ERR_OK;
12834
if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12835
(oldctxt->depth > 1024)) {
12836
return(XML_ERR_ENTITY_LOOP);
12842
if (string == NULL)
12843
return(XML_ERR_INTERNAL_ERROR);
12845
size = xmlStrlen(string);
12847
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12848
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12849
if (user_data != NULL)
12850
ctxt->userData = user_data;
12852
ctxt->userData = ctxt;
12853
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12854
ctxt->dict = oldctxt->dict;
12855
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12856
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12857
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12860
/* propagate namespaces down the entity */
12861
for (i = 0;i < oldctxt->nsNr;i += 2) {
12862
nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12866
oldsax = ctxt->sax;
12867
ctxt->sax = oldctxt->sax;
12868
xmlDetectSAX2(ctxt);
12869
ctxt->replaceEntities = oldctxt->replaceEntities;
12870
ctxt->options = oldctxt->options;
12872
ctxt->_private = oldctxt->_private;
12873
if (oldctxt->myDoc == NULL) {
12874
newDoc = xmlNewDoc(BAD_CAST "1.0");
12875
if (newDoc == NULL) {
12876
ctxt->sax = oldsax;
12878
xmlFreeParserCtxt(ctxt);
12879
return(XML_ERR_INTERNAL_ERROR);
12881
newDoc->properties = XML_DOC_INTERNAL;
12882
newDoc->dict = ctxt->dict;
12883
xmlDictReference(newDoc->dict);
12884
ctxt->myDoc = newDoc;
12886
ctxt->myDoc = oldctxt->myDoc;
12887
content = ctxt->myDoc->children;
12888
last = ctxt->myDoc->last;
12890
newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12891
if (newRoot == NULL) {
12892
ctxt->sax = oldsax;
12894
xmlFreeParserCtxt(ctxt);
12895
if (newDoc != NULL) {
12896
xmlFreeDoc(newDoc);
12898
return(XML_ERR_INTERNAL_ERROR);
12900
ctxt->myDoc->children = NULL;
12901
ctxt->myDoc->last = NULL;
12902
xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12903
nodePush(ctxt, ctxt->myDoc->children);
12904
ctxt->instate = XML_PARSER_CONTENT;
12905
ctxt->depth = oldctxt->depth + 1;
12907
ctxt->validate = 0;
12908
ctxt->loadsubset = oldctxt->loadsubset;
12909
if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12911
* ID/IDREF registration will be done in xmlValidateElement below
12913
ctxt->loadsubset |= XML_SKIP_IDS;
12915
ctxt->dictNames = oldctxt->dictNames;
12916
ctxt->attsDefault = oldctxt->attsDefault;
12917
ctxt->attsSpecial = oldctxt->attsSpecial;
12919
xmlParseContent(ctxt);
12920
if ((RAW == '<') && (NXT(1) == '/')) {
12921
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12922
} else if (RAW != 0) {
12923
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12925
if (ctxt->node != ctxt->myDoc->children) {
12926
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12929
if (!ctxt->wellFormed) {
12930
if (ctxt->errNo == 0)
12931
ret = XML_ERR_INTERNAL_ERROR;
12933
ret = (xmlParserErrors)ctxt->errNo;
12938
if ((lst != NULL) && (ret == XML_ERR_OK)) {
12942
* Return the newly created nodeset after unlinking it from
12943
* they pseudo parent.
12945
cur = ctxt->myDoc->children->children;
12947
while (cur != NULL) {
12948
#ifdef LIBXML_VALID_ENABLED
12949
if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12950
(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12951
(cur->type == XML_ELEMENT_NODE)) {
12952
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12953
oldctxt->myDoc, cur);
12955
#endif /* LIBXML_VALID_ENABLED */
12956
cur->parent = NULL;
12959
ctxt->myDoc->children->children = NULL;
12961
if (ctxt->myDoc != NULL) {
12962
xmlFreeNode(ctxt->myDoc->children);
12963
ctxt->myDoc->children = content;
12964
ctxt->myDoc->last = last;
12968
* Record in the parent context the number of entities replacement
12969
* done when parsing that reference.
12971
if (oldctxt != NULL)
12972
oldctxt->nbentities += ctxt->nbentities;
12975
* Also record the last error if any
12977
if (ctxt->lastError.code != XML_ERR_OK)
12978
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12980
ctxt->sax = oldsax;
12982
ctxt->attsDefault = NULL;
12983
ctxt->attsSpecial = NULL;
12984
xmlFreeParserCtxt(ctxt);
12985
if (newDoc != NULL) {
12986
xmlFreeDoc(newDoc);
12993
* xmlParseInNodeContext:
12994
* @node: the context node
12995
* @data: the input string
12996
* @datalen: the input string length in bytes
12997
* @options: a combination of xmlParserOption
12998
* @lst: the return value for the set of parsed nodes
13000
* Parse a well-balanced chunk of an XML document
13001
* within the context (DTD, namespaces, etc ...) of the given node.
13003
* The allowed sequence for the data is a Well Balanced Chunk defined by
13004
* the content production in the XML grammar:
13006
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13008
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13009
* error code otherwise
13012
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13013
int options, xmlNodePtr *lst) {
13015
xmlParserCtxtPtr ctxt;
13016
xmlDocPtr doc = NULL;
13017
xmlNodePtr fake, cur;
13020
xmlParserErrors ret = XML_ERR_OK;
13023
* check all input parameters, grab the document
13025
if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13026
return(XML_ERR_INTERNAL_ERROR);
13027
switch (node->type) {
13028
case XML_ELEMENT_NODE:
13029
case XML_ATTRIBUTE_NODE:
13030
case XML_TEXT_NODE:
13031
case XML_CDATA_SECTION_NODE:
13032
case XML_ENTITY_REF_NODE:
13034
case XML_COMMENT_NODE:
13035
case XML_DOCUMENT_NODE:
13036
case XML_HTML_DOCUMENT_NODE:
13039
return(XML_ERR_INTERNAL_ERROR);
13042
while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13043
(node->type != XML_DOCUMENT_NODE) &&
13044
(node->type != XML_HTML_DOCUMENT_NODE))
13045
node = node->parent;
13047
return(XML_ERR_INTERNAL_ERROR);
13048
if (node->type == XML_ELEMENT_NODE)
13051
doc = (xmlDocPtr) node;
13053
return(XML_ERR_INTERNAL_ERROR);
13056
* allocate a context and set-up everything not related to the
13057
* node position in the tree
13059
if (doc->type == XML_DOCUMENT_NODE)
13060
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13061
#ifdef LIBXML_HTML_ENABLED
13062
else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13063
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13065
* When parsing in context, it makes no sense to add implied
13066
* elements like html/body/etc...
13068
options |= HTML_PARSE_NOIMPLIED;
13072
return(XML_ERR_INTERNAL_ERROR);
13075
return(XML_ERR_NO_MEMORY);
13078
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13079
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13080
* we must wait until the last moment to free the original one.
13082
if (doc->dict != NULL) {
13083
if (ctxt->dict != NULL)
13084
xmlDictFree(ctxt->dict);
13085
ctxt->dict = doc->dict;
13087
options |= XML_PARSE_NODICT;
13089
if (doc->encoding != NULL) {
13090
xmlCharEncodingHandlerPtr hdlr;
13092
if (ctxt->encoding != NULL)
13093
xmlFree((xmlChar *) ctxt->encoding);
13094
ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13096
hdlr = xmlFindCharEncodingHandler(doc->encoding);
13097
if (hdlr != NULL) {
13098
xmlSwitchToEncoding(ctxt, hdlr);
13100
return(XML_ERR_UNSUPPORTED_ENCODING);
13104
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13105
xmlDetectSAX2(ctxt);
13108
fake = xmlNewComment(NULL);
13109
if (fake == NULL) {
13110
xmlFreeParserCtxt(ctxt);
13111
return(XML_ERR_NO_MEMORY);
13113
xmlAddChild(node, fake);
13115
if (node->type == XML_ELEMENT_NODE) {
13116
nodePush(ctxt, node);
13118
* initialize the SAX2 namespaces stack
13121
while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13122
xmlNsPtr ns = cur->nsDef;
13123
const xmlChar *iprefix, *ihref;
13125
while (ns != NULL) {
13127
iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13128
ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13130
iprefix = ns->prefix;
13134
if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13135
nsPush(ctxt, iprefix, ihref);
13142
ctxt->instate = XML_PARSER_CONTENT;
13145
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13147
* ID/IDREF registration will be done in xmlValidateElement below
13149
ctxt->loadsubset |= XML_SKIP_IDS;
13152
#ifdef LIBXML_HTML_ENABLED
13153
if (doc->type == XML_HTML_DOCUMENT_NODE)
13154
__htmlParseContent(ctxt);
13157
xmlParseContent(ctxt);
13160
if ((RAW == '<') && (NXT(1) == '/')) {
13161
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13162
} else if (RAW != 0) {
13163
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13165
if ((ctxt->node != NULL) && (ctxt->node != node)) {
13166
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13167
ctxt->wellFormed = 0;
13170
if (!ctxt->wellFormed) {
13171
if (ctxt->errNo == 0)
13172
ret = XML_ERR_INTERNAL_ERROR;
13174
ret = (xmlParserErrors)ctxt->errNo;
13180
* Return the newly created nodeset after unlinking it from
13181
* the pseudo sibling.
13194
while (cur != NULL) {
13195
cur->parent = NULL;
13199
xmlUnlinkNode(fake);
13203
if (ret != XML_ERR_OK) {
13204
xmlFreeNodeList(*lst);
13208
if (doc->dict != NULL)
13210
xmlFreeParserCtxt(ctxt);
13214
return(XML_ERR_INTERNAL_ERROR);
13218
#ifdef LIBXML_SAX1_ENABLED
13220
* xmlParseBalancedChunkMemoryRecover:
13221
* @doc: the document the chunk pertains to
13222
* @sax: the SAX handler bloc (possibly NULL)
13223
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13224
* @depth: Used for loop detection, use 0
13225
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13226
* @lst: the return value for the set of parsed nodes
13227
* @recover: return nodes even if the data is broken (use 0)
13230
* Parse a well-balanced chunk of an XML document
13231
* called by the parser
13232
* The allowed sequence for the Well Balanced Chunk is the one defined by
13233
* the content production in the XML grammar:
13235
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13237
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13238
* the parser error code otherwise
13240
* In case recover is set to 1, the nodelist will not be empty even if
13241
* the parsed chunk is not well balanced, assuming the parsing succeeded to
13245
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13246
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13248
xmlParserCtxtPtr ctxt;
13250
xmlSAXHandlerPtr oldsax = NULL;
13251
xmlNodePtr content, newRoot;
13256
return(XML_ERR_ENTITY_LOOP);
13262
if (string == NULL)
13265
size = xmlStrlen(string);
13267
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13268
if (ctxt == NULL) return(-1);
13269
ctxt->userData = ctxt;
13271
oldsax = ctxt->sax;
13273
if (user_data != NULL)
13274
ctxt->userData = user_data;
13276
newDoc = xmlNewDoc(BAD_CAST "1.0");
13277
if (newDoc == NULL) {
13278
xmlFreeParserCtxt(ctxt);
13281
newDoc->properties = XML_DOC_INTERNAL;
13282
if ((doc != NULL) && (doc->dict != NULL)) {
13283
xmlDictFree(ctxt->dict);
13284
ctxt->dict = doc->dict;
13285
xmlDictReference(ctxt->dict);
13286
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13287
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13288
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13289
ctxt->dictNames = 1;
13291
xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13294
newDoc->intSubset = doc->intSubset;
13295
newDoc->extSubset = doc->extSubset;
13297
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13298
if (newRoot == NULL) {
13300
ctxt->sax = oldsax;
13301
xmlFreeParserCtxt(ctxt);
13302
newDoc->intSubset = NULL;
13303
newDoc->extSubset = NULL;
13304
xmlFreeDoc(newDoc);
13307
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13308
nodePush(ctxt, newRoot);
13310
ctxt->myDoc = newDoc;
13312
ctxt->myDoc = newDoc;
13313
newDoc->children->doc = doc;
13314
/* Ensure that doc has XML spec namespace */
13315
xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13316
newDoc->oldNs = doc->oldNs;
13318
ctxt->instate = XML_PARSER_CONTENT;
13319
ctxt->depth = depth;
13322
* Doing validity checking on chunk doesn't make sense
13324
ctxt->validate = 0;
13325
ctxt->loadsubset = 0;
13326
xmlDetectSAX2(ctxt);
13328
if ( doc != NULL ){
13329
content = doc->children;
13330
doc->children = NULL;
13331
xmlParseContent(ctxt);
13332
doc->children = content;
13335
xmlParseContent(ctxt);
13337
if ((RAW == '<') && (NXT(1) == '/')) {
13338
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13339
} else if (RAW != 0) {
13340
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13342
if (ctxt->node != newDoc->children) {
13343
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13346
if (!ctxt->wellFormed) {
13347
if (ctxt->errNo == 0)
13355
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13359
* Return the newly created nodeset after unlinking it from
13360
* they pseudo parent.
13362
cur = newDoc->children->children;
13364
while (cur != NULL) {
13365
xmlSetTreeDoc(cur, doc);
13366
cur->parent = NULL;
13369
newDoc->children->children = NULL;
13373
ctxt->sax = oldsax;
13374
xmlFreeParserCtxt(ctxt);
13375
newDoc->intSubset = NULL;
13376
newDoc->extSubset = NULL;
13377
newDoc->oldNs = NULL;
13378
xmlFreeDoc(newDoc);
13384
* xmlSAXParseEntity:
13385
* @sax: the SAX handler block
13386
* @filename: the filename
13388
* parse an XML external entity out of context and build a tree.
13389
* It use the given SAX function block to handle the parsing callback.
13390
* If sax is NULL, fallback to the default DOM tree building routines.
13392
* [78] extParsedEnt ::= TextDecl? content
13394
* This correspond to a "Well Balanced" chunk
13396
* Returns the resulting document tree
13400
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13402
xmlParserCtxtPtr ctxt;
13404
ctxt = xmlCreateFileParserCtxt(filename);
13405
if (ctxt == NULL) {
13409
if (ctxt->sax != NULL)
13410
xmlFree(ctxt->sax);
13412
ctxt->userData = NULL;
13415
xmlParseExtParsedEnt(ctxt);
13417
if (ctxt->wellFormed)
13421
xmlFreeDoc(ctxt->myDoc);
13422
ctxt->myDoc = NULL;
13426
xmlFreeParserCtxt(ctxt);
13433
* @filename: the filename
13435
* parse an XML external entity out of context and build a tree.
13437
* [78] extParsedEnt ::= TextDecl? content
13439
* This correspond to a "Well Balanced" chunk
13441
* Returns the resulting document tree
13445
xmlParseEntity(const char *filename) {
13446
return(xmlSAXParseEntity(NULL, filename));
13448
#endif /* LIBXML_SAX1_ENABLED */
13451
* xmlCreateEntityParserCtxtInternal:
13452
* @URL: the entity URL
13453
* @ID: the entity PUBLIC ID
13454
* @base: a possible base for the target URI
13455
* @pctx: parser context used to set options on new context
13457
* Create a parser context for an external entity
13458
* Automatic support for ZLIB/Compress compressed document is provided
13459
* by default if found at compile-time.
13461
* Returns the new parser context or NULL
13463
static xmlParserCtxtPtr
13464
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13465
const xmlChar *base, xmlParserCtxtPtr pctx) {
13466
xmlParserCtxtPtr ctxt;
13467
xmlParserInputPtr inputStream;
13468
char *directory = NULL;
13471
ctxt = xmlNewParserCtxt();
13472
if (ctxt == NULL) {
13476
if (pctx != NULL) {
13477
ctxt->options = pctx->options;
13478
ctxt->_private = pctx->_private;
13481
uri = xmlBuildURI(URL, base);
13484
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13485
if (inputStream == NULL) {
13486
xmlFreeParserCtxt(ctxt);
13490
inputPush(ctxt, inputStream);
13492
if ((ctxt->directory == NULL) && (directory == NULL))
13493
directory = xmlParserGetDirectory((char *)URL);
13494
if ((ctxt->directory == NULL) && (directory != NULL))
13495
ctxt->directory = directory;
13497
inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13498
if (inputStream == NULL) {
13500
xmlFreeParserCtxt(ctxt);
13504
inputPush(ctxt, inputStream);
13506
if ((ctxt->directory == NULL) && (directory == NULL))
13507
directory = xmlParserGetDirectory((char *)uri);
13508
if ((ctxt->directory == NULL) && (directory != NULL))
13509
ctxt->directory = directory;
13516
* xmlCreateEntityParserCtxt:
13517
* @URL: the entity URL
13518
* @ID: the entity PUBLIC ID
13519
* @base: a possible base for the target URI
13521
* Create a parser context for an external entity
13522
* Automatic support for ZLIB/Compress compressed document is provided
13523
* by default if found at compile-time.
13525
* Returns the new parser context or NULL
13528
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13529
const xmlChar *base) {
13530
return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13534
/************************************************************************
13536
* Front ends when parsing from a file *
13538
************************************************************************/
13541
* xmlCreateURLParserCtxt:
13542
* @filename: the filename or URL
13543
* @options: a combination of xmlParserOption
13545
* Create a parser context for a file or URL content.
13546
* Automatic support for ZLIB/Compress compressed document is provided
13547
* by default if found at compile-time and for file accesses
13549
* Returns the new parser context or NULL
13552
xmlCreateURLParserCtxt(const char *filename, int options)
13554
xmlParserCtxtPtr ctxt;
13555
xmlParserInputPtr inputStream;
13556
char *directory = NULL;
13558
ctxt = xmlNewParserCtxt();
13559
if (ctxt == NULL) {
13560
xmlErrMemory(NULL, "cannot allocate parser context");
13565
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13566
ctxt->linenumbers = 1;
13568
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13569
if (inputStream == NULL) {
13570
xmlFreeParserCtxt(ctxt);
13574
inputPush(ctxt, inputStream);
13575
if ((ctxt->directory == NULL) && (directory == NULL))
13576
directory = xmlParserGetDirectory(filename);
13577
if ((ctxt->directory == NULL) && (directory != NULL))
13578
ctxt->directory = directory;
13584
* xmlCreateFileParserCtxt:
13585
* @filename: the filename
13587
* Create a parser context for a file content.
13588
* Automatic support for ZLIB/Compress compressed document is provided
13589
* by default if found at compile-time.
13591
* Returns the new parser context or NULL
13594
xmlCreateFileParserCtxt(const char *filename)
13596
return(xmlCreateURLParserCtxt(filename, 0));
13599
#ifdef LIBXML_SAX1_ENABLED
13601
* xmlSAXParseFileWithData:
13602
* @sax: the SAX handler block
13603
* @filename: the filename
13604
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
13606
* @data: the userdata
13608
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
13609
* compressed document is provided by default if found at compile-time.
13610
* It use the given SAX function block to handle the parsing callback.
13611
* If sax is NULL, fallback to the default DOM tree building routines.
13613
* User data (void *) is stored within the parser context in the
13614
* context's _private member, so it is available nearly everywhere in libxml
13616
* Returns the resulting document tree
13620
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13621
int recovery, void *data) {
13623
xmlParserCtxtPtr ctxt;
13627
ctxt = xmlCreateFileParserCtxt(filename);
13628
if (ctxt == NULL) {
13632
if (ctxt->sax != NULL)
13633
xmlFree(ctxt->sax);
13636
xmlDetectSAX2(ctxt);
13638
ctxt->_private = data;
13641
if (ctxt->directory == NULL)
13642
ctxt->directory = xmlParserGetDirectory(filename);
13644
ctxt->recovery = recovery;
13646
xmlParseDocument(ctxt);
13648
if ((ctxt->wellFormed) || recovery) {
13651
if (ctxt->input->buf->compressed > 0)
13652
ret->compression = 9;
13654
ret->compression = ctxt->input->buf->compressed;
13659
xmlFreeDoc(ctxt->myDoc);
13660
ctxt->myDoc = NULL;
13664
xmlFreeParserCtxt(ctxt);
13671
* @sax: the SAX handler block
13672
* @filename: the filename
13673
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
13676
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
13677
* compressed document is provided by default if found at compile-time.
13678
* It use the given SAX function block to handle the parsing callback.
13679
* If sax is NULL, fallback to the default DOM tree building routines.
13681
* Returns the resulting document tree
13685
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13687
return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13692
* @cur: a pointer to an array of xmlChar
13694
* parse an XML in-memory document and build a tree.
13695
* In the case the document is not Well Formed, a attempt to build a
13696
* tree is tried anyway
13698
* Returns the resulting document tree or NULL in case of failure
13702
xmlRecoverDoc(const xmlChar *cur) {
13703
return(xmlSAXParseDoc(NULL, cur, 1));
13708
* @filename: the filename
13710
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
13711
* compressed document is provided by default if found at compile-time.
13713
* Returns the resulting document tree if the file was wellformed,
13718
xmlParseFile(const char *filename) {
13719
return(xmlSAXParseFile(NULL, filename, 0));
13724
* @filename: the filename
13726
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
13727
* compressed document is provided by default if found at compile-time.
13728
* In the case the document is not Well Formed, it attempts to build
13731
* Returns the resulting document tree or NULL in case of failure
13735
xmlRecoverFile(const char *filename) {
13736
return(xmlSAXParseFile(NULL, filename, 1));
13741
* xmlSetupParserForBuffer:
13742
* @ctxt: an XML parser context
13743
* @buffer: a xmlChar * buffer
13744
* @filename: a file name
13746
* Setup the parser context to parse a new buffer; Clears any prior
13747
* contents from the parser context. The buffer parameter must not be
13748
* NULL, but the filename parameter can be
13751
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13752
const char* filename)
13754
xmlParserInputPtr input;
13756
if ((ctxt == NULL) || (buffer == NULL))
13759
input = xmlNewInputStream(ctxt);
13760
if (input == NULL) {
13761
xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13762
xmlClearParserCtxt(ctxt);
13766
xmlClearParserCtxt(ctxt);
13767
if (filename != NULL)
13768
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13769
input->base = buffer;
13770
input->cur = buffer;
13771
input->end = &buffer[xmlStrlen(buffer)];
13772
inputPush(ctxt, input);
13776
* xmlSAXUserParseFile:
13777
* @sax: a SAX handler
13778
* @user_data: The user data returned on SAX callbacks
13779
* @filename: a file name
13781
* parse an XML file and call the given SAX handler routines.
13782
* Automatic support for ZLIB/Compress compressed document is provided
13784
* Returns 0 in case of success or a error number otherwise
13787
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13788
const char *filename) {
13790
xmlParserCtxtPtr ctxt;
13792
ctxt = xmlCreateFileParserCtxt(filename);
13793
if (ctxt == NULL) return -1;
13794
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13795
xmlFree(ctxt->sax);
13797
xmlDetectSAX2(ctxt);
13799
if (user_data != NULL)
13800
ctxt->userData = user_data;
13802
xmlParseDocument(ctxt);
13804
if (ctxt->wellFormed)
13807
if (ctxt->errNo != 0)
13814
if (ctxt->myDoc != NULL) {
13815
xmlFreeDoc(ctxt->myDoc);
13816
ctxt->myDoc = NULL;
13818
xmlFreeParserCtxt(ctxt);
13822
#endif /* LIBXML_SAX1_ENABLED */
13824
/************************************************************************
13826
* Front ends when parsing from memory *
13828
************************************************************************/
13831
* xmlCreateMemoryParserCtxt:
13832
* @buffer: a pointer to a char array
13833
* @size: the size of the array
13835
* Create a parser context for an XML in-memory document.
13837
* Returns the new parser context or NULL
13840
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13841
xmlParserCtxtPtr ctxt;
13842
xmlParserInputPtr input;
13843
xmlParserInputBufferPtr buf;
13845
if (buffer == NULL)
13850
ctxt = xmlNewParserCtxt();
13854
/* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13855
buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13857
xmlFreeParserCtxt(ctxt);
13861
input = xmlNewInputStream(ctxt);
13862
if (input == NULL) {
13863
xmlFreeParserInputBuffer(buf);
13864
xmlFreeParserCtxt(ctxt);
13868
input->filename = NULL;
13870
input->base = input->buf->buffer->content;
13871
input->cur = input->buf->buffer->content;
13872
input->end = &input->buf->buffer->content[input->buf->buffer->use];
13874
inputPush(ctxt, input);
13878
#ifdef LIBXML_SAX1_ENABLED
13880
* xmlSAXParseMemoryWithData:
13881
* @sax: the SAX handler block
13882
* @buffer: an pointer to a char array
13883
* @size: the size of the array
13884
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
13886
* @data: the userdata
13888
* parse an XML in-memory block and use the given SAX function block
13889
* to handle the parsing callback. If sax is NULL, fallback to the default
13890
* DOM tree building routines.
13892
* User data (void *) is stored within the parser context in the
13893
* context's _private member, so it is available nearly everywhere in libxml
13895
* Returns the resulting document tree
13899
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13900
int size, int recovery, void *data) {
13902
xmlParserCtxtPtr ctxt;
13906
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13907
if (ctxt == NULL) return(NULL);
13909
if (ctxt->sax != NULL)
13910
xmlFree(ctxt->sax);
13913
xmlDetectSAX2(ctxt);
13915
ctxt->_private=data;
13918
ctxt->recovery = recovery;
13920
xmlParseDocument(ctxt);
13922
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13925
xmlFreeDoc(ctxt->myDoc);
13926
ctxt->myDoc = NULL;
13930
xmlFreeParserCtxt(ctxt);
13936
* xmlSAXParseMemory:
13937
* @sax: the SAX handler block
13938
* @buffer: an pointer to a char array
13939
* @size: the size of the array
13940
* @recovery: work in recovery mode, i.e. tries to read not Well Formed
13943
* parse an XML in-memory block and use the given SAX function block
13944
* to handle the parsing callback. If sax is NULL, fallback to the default
13945
* DOM tree building routines.
13947
* Returns the resulting document tree
13950
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13951
int size, int recovery) {
13952
return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13957
* @buffer: an pointer to a char array
13958
* @size: the size of the array
13960
* parse an XML in-memory block and build a tree.
13962
* Returns the resulting document tree
13965
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13966
return(xmlSAXParseMemory(NULL, buffer, size, 0));
13970
* xmlRecoverMemory:
13971
* @buffer: an pointer to a char array
13972
* @size: the size of the array
13974
* parse an XML in-memory block and build a tree.
13975
* In the case the document is not Well Formed, an attempt to
13976
* build a tree is tried anyway
13978
* Returns the resulting document tree or NULL in case of error
13981
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13982
return(xmlSAXParseMemory(NULL, buffer, size, 1));
13986
* xmlSAXUserParseMemory:
13987
* @sax: a SAX handler
13988
* @user_data: The user data returned on SAX callbacks
13989
* @buffer: an in-memory XML document input
13990
* @size: the length of the XML document in bytes
13992
* A better SAX parsing routine.
13993
* parse an XML in-memory buffer and call the given SAX handler routines.
13995
* Returns 0 in case of success or a error number otherwise
13997
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13998
const char *buffer, int size) {
14000
xmlParserCtxtPtr ctxt;
14004
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14005
if (ctxt == NULL) return -1;
14006
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14007
xmlFree(ctxt->sax);
14009
xmlDetectSAX2(ctxt);
14011
if (user_data != NULL)
14012
ctxt->userData = user_data;
14014
xmlParseDocument(ctxt);
14016
if (ctxt->wellFormed)
14019
if (ctxt->errNo != 0)
14026
if (ctxt->myDoc != NULL) {
14027
xmlFreeDoc(ctxt->myDoc);
14028
ctxt->myDoc = NULL;
14030
xmlFreeParserCtxt(ctxt);
14034
#endif /* LIBXML_SAX1_ENABLED */
14037
* xmlCreateDocParserCtxt:
14038
* @cur: a pointer to an array of xmlChar
14040
* Creates a parser context for an XML in-memory document.
14042
* Returns the new parser context or NULL
14045
xmlCreateDocParserCtxt(const xmlChar *cur) {
14050
len = xmlStrlen(cur);
14051
return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14054
#ifdef LIBXML_SAX1_ENABLED
14057
* @sax: the SAX handler block
14058
* @cur: a pointer to an array of xmlChar
14059
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14062
* parse an XML in-memory document and build a tree.
14063
* It use the given SAX function block to handle the parsing callback.
14064
* If sax is NULL, fallback to the default DOM tree building routines.
14066
* Returns the resulting document tree
14070
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14072
xmlParserCtxtPtr ctxt;
14073
xmlSAXHandlerPtr oldsax = NULL;
14075
if (cur == NULL) return(NULL);
14078
ctxt = xmlCreateDocParserCtxt(cur);
14079
if (ctxt == NULL) return(NULL);
14081
oldsax = ctxt->sax;
14083
ctxt->userData = NULL;
14085
xmlDetectSAX2(ctxt);
14087
xmlParseDocument(ctxt);
14088
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14091
xmlFreeDoc(ctxt->myDoc);
14092
ctxt->myDoc = NULL;
14095
ctxt->sax = oldsax;
14096
xmlFreeParserCtxt(ctxt);
14103
* @cur: a pointer to an array of xmlChar
14105
* parse an XML in-memory document and build a tree.
14107
* Returns the resulting document tree
14111
xmlParseDoc(const xmlChar *cur) {
14112
return(xmlSAXParseDoc(NULL, cur, 0));
14114
#endif /* LIBXML_SAX1_ENABLED */
14116
#ifdef LIBXML_LEGACY_ENABLED
14117
/************************************************************************
14119
* Specific function to keep track of entities references *
14120
* and used by the XSLT debugger *
14122
************************************************************************/
14124
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14127
* xmlAddEntityReference:
14128
* @ent : A valid entity
14129
* @firstNode : A valid first node for children of entity
14130
* @lastNode : A valid last node of children entity
14132
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14135
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14136
xmlNodePtr lastNode)
14138
if (xmlEntityRefFunc != NULL) {
14139
(*xmlEntityRefFunc) (ent, firstNode, lastNode);
14145
* xmlSetEntityReferenceFunc:
14146
* @func: A valid function
14148
* Set the function to call call back when a xml reference has been made
14151
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14153
xmlEntityRefFunc = func;
14155
#endif /* LIBXML_LEGACY_ENABLED */
14157
/************************************************************************
14161
************************************************************************/
14163
#ifdef LIBXML_XPATH_ENABLED
14164
#include <libxml/xpath.h>
14167
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14168
static int xmlParserInitialized = 0;
14173
* Initialization function for the XML parser.
14174
* This is not reentrant. Call once before processing in case of
14175
* use in multithreaded programs.
14179
xmlInitParser(void) {
14180
if (xmlParserInitialized != 0)
14183
#ifdef LIBXML_THREAD_ENABLED
14184
__xmlGlobalInitMutexLock();
14185
if (xmlParserInitialized == 0) {
14189
if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14190
(xmlGenericError == NULL))
14191
initGenericErrorDefaultFunc(NULL);
14193
xmlInitializeDict();
14194
xmlInitCharEncodingHandlers();
14195
xmlDefaultSAXHandlerInit();
14196
xmlRegisterDefaultInputCallbacks();
14197
#ifdef LIBXML_OUTPUT_ENABLED
14198
xmlRegisterDefaultOutputCallbacks();
14199
#endif /* LIBXML_OUTPUT_ENABLED */
14200
#ifdef LIBXML_HTML_ENABLED
14201
htmlInitAutoClose();
14202
htmlDefaultSAXHandlerInit();
14204
#ifdef LIBXML_XPATH_ENABLED
14207
xmlParserInitialized = 1;
14208
#ifdef LIBXML_THREAD_ENABLED
14210
__xmlGlobalInitMutexUnlock();
14215
* xmlCleanupParser:
14217
* This function name is somewhat misleading. It does not clean up
14218
* parser state, it cleans up memory allocated by the library itself.
14219
* It is a cleanup function for the XML library. It tries to reclaim all
14220
* related global memory allocated for the library processing.
14221
* It doesn't deallocate any document related memory. One should
14222
* call xmlCleanupParser() only when the process has finished using
14223
* the library and all XML/HTML documents built with it.
14224
* See also xmlInitParser() which has the opposite function of preparing
14225
* the library for operations.
14227
* WARNING: if your application is multithreaded or has plugin support
14228
* calling this may crash the application if another thread or
14229
* a plugin is still using libxml2. It's sometimes very hard to
14230
* guess if libxml2 is in use in the application, some libraries
14231
* or plugins may use it without notice. In case of doubt abstain
14232
* from calling this function or do it just before calling exit()
14233
* to avoid leak reports from valgrind !
14237
xmlCleanupParser(void) {
14238
if (!xmlParserInitialized)
14241
xmlCleanupCharEncodingHandlers();
14242
#ifdef LIBXML_CATALOG_ENABLED
14243
xmlCatalogCleanup();
14246
xmlCleanupInputCallbacks();
14247
#ifdef LIBXML_OUTPUT_ENABLED
14248
xmlCleanupOutputCallbacks();
14250
#ifdef LIBXML_SCHEMAS_ENABLED
14251
xmlSchemaCleanupTypes();
14252
xmlRelaxNGCleanupTypes();
14254
xmlCleanupGlobals();
14255
xmlResetLastError();
14256
xmlCleanupThreads(); /* must be last if called not from the main thread */
14257
xmlCleanupMemory();
14258
xmlParserInitialized = 0;
14261
/************************************************************************
14263
* New set (2.6.0) of simpler and more flexible APIs *
14265
************************************************************************/
14271
* Free a string if it is not owned by the "dict" dictionnary in the
14274
#define DICT_FREE(str) \
14275
if ((str) && ((!dict) || \
14276
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14277
xmlFree((char *)(str));
14281
* @ctxt: an XML parser context
14283
* Reset a parser context
14286
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14288
xmlParserInputPtr input;
14296
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14297
xmlFreeInputStream(input);
14300
ctxt->input = NULL;
14303
if (ctxt->spaceTab != NULL) {
14304
ctxt->spaceTab[0] = -1;
14305
ctxt->space = &ctxt->spaceTab[0];
14307
ctxt->space = NULL;
14317
DICT_FREE(ctxt->version);
14318
ctxt->version = NULL;
14319
DICT_FREE(ctxt->encoding);
14320
ctxt->encoding = NULL;
14321
DICT_FREE(ctxt->directory);
14322
ctxt->directory = NULL;
14323
DICT_FREE(ctxt->extSubURI);
14324
ctxt->extSubURI = NULL;
14325
DICT_FREE(ctxt->extSubSystem);
14326
ctxt->extSubSystem = NULL;
14327
if (ctxt->myDoc != NULL)
14328
xmlFreeDoc(ctxt->myDoc);
14329
ctxt->myDoc = NULL;
14331
ctxt->standalone = -1;
14332
ctxt->hasExternalSubset = 0;
14333
ctxt->hasPErefs = 0;
14335
ctxt->external = 0;
14336
ctxt->instate = XML_PARSER_START;
14339
ctxt->wellFormed = 1;
14340
ctxt->nsWellFormed = 1;
14341
ctxt->disableSAX = 0;
14344
ctxt->vctxt.userData = ctxt;
14345
ctxt->vctxt.error = xmlParserValidityError;
14346
ctxt->vctxt.warning = xmlParserValidityWarning;
14348
ctxt->record_info = 0;
14350
ctxt->checkIndex = 0;
14351
ctxt->inSubset = 0;
14352
ctxt->errNo = XML_ERR_OK;
14354
ctxt->charset = XML_CHAR_ENCODING_UTF8;
14355
ctxt->catalogs = NULL;
14356
ctxt->nbentities = 0;
14357
ctxt->sizeentities = 0;
14358
xmlInitNodeInfoSeq(&ctxt->node_seq);
14360
if (ctxt->attsDefault != NULL) {
14361
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14362
ctxt->attsDefault = NULL;
14364
if (ctxt->attsSpecial != NULL) {
14365
xmlHashFree(ctxt->attsSpecial, NULL);
14366
ctxt->attsSpecial = NULL;
14369
#ifdef LIBXML_CATALOG_ENABLED
14370
if (ctxt->catalogs != NULL)
14371
xmlCatalogFreeLocal(ctxt->catalogs);
14373
if (ctxt->lastError.code != XML_ERR_OK)
14374
xmlResetError(&ctxt->lastError);
14378
* xmlCtxtResetPush:
14379
* @ctxt: an XML parser context
14380
* @chunk: a pointer to an array of chars
14381
* @size: number of chars in the array
14382
* @filename: an optional file name or URI
14383
* @encoding: the document encoding, or NULL
14385
* Reset a push parser context
14387
* Returns 0 in case of success and 1 in case of error
14390
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14391
int size, const char *filename, const char *encoding)
14393
xmlParserInputPtr inputStream;
14394
xmlParserInputBufferPtr buf;
14395
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14400
if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14401
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14403
buf = xmlAllocParserInputBuffer(enc);
14407
if (ctxt == NULL) {
14408
xmlFreeParserInputBuffer(buf);
14412
xmlCtxtReset(ctxt);
14414
if (ctxt->pushTab == NULL) {
14415
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14416
sizeof(xmlChar *));
14417
if (ctxt->pushTab == NULL) {
14418
xmlErrMemory(ctxt, NULL);
14419
xmlFreeParserInputBuffer(buf);
14424
if (filename == NULL) {
14425
ctxt->directory = NULL;
14427
ctxt->directory = xmlParserGetDirectory(filename);
14430
inputStream = xmlNewInputStream(ctxt);
14431
if (inputStream == NULL) {
14432
xmlFreeParserInputBuffer(buf);
14436
if (filename == NULL)
14437
inputStream->filename = NULL;
14439
inputStream->filename = (char *)
14440
xmlCanonicPath((const xmlChar *) filename);
14441
inputStream->buf = buf;
14442
inputStream->base = inputStream->buf->buffer->content;
14443
inputStream->cur = inputStream->buf->buffer->content;
14445
&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14447
inputPush(ctxt, inputStream);
14449
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14450
(ctxt->input->buf != NULL)) {
14451
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14452
int cur = ctxt->input->cur - ctxt->input->base;
14454
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14456
ctxt->input->base = ctxt->input->buf->buffer->content + base;
14457
ctxt->input->cur = ctxt->input->base + cur;
14459
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14462
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14466
if (encoding != NULL) {
14467
xmlCharEncodingHandlerPtr hdlr;
14469
if (ctxt->encoding != NULL)
14470
xmlFree((xmlChar *) ctxt->encoding);
14471
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14473
hdlr = xmlFindCharEncodingHandler(encoding);
14474
if (hdlr != NULL) {
14475
xmlSwitchToEncoding(ctxt, hdlr);
14477
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14478
"Unsupported encoding %s\n", BAD_CAST encoding);
14480
} else if (enc != XML_CHAR_ENCODING_NONE) {
14481
xmlSwitchEncoding(ctxt, enc);
14489
* xmlCtxtUseOptionsInternal:
14490
* @ctxt: an XML parser context
14491
* @options: a combination of xmlParserOption
14492
* @encoding: the user provided encoding to use
14494
* Applies the options to the parser context
14496
* Returns 0 in case of success, the set of unknown or unimplemented options
14497
* in case of error.
14500
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14504
if (encoding != NULL) {
14505
if (ctxt->encoding != NULL)
14506
xmlFree((xmlChar *) ctxt->encoding);
14507
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14509
if (options & XML_PARSE_RECOVER) {
14510
ctxt->recovery = 1;
14511
options -= XML_PARSE_RECOVER;
14512
ctxt->options |= XML_PARSE_RECOVER;
14514
ctxt->recovery = 0;
14515
if (options & XML_PARSE_DTDLOAD) {
14516
ctxt->loadsubset = XML_DETECT_IDS;
14517
options -= XML_PARSE_DTDLOAD;
14518
ctxt->options |= XML_PARSE_DTDLOAD;
14520
ctxt->loadsubset = 0;
14521
if (options & XML_PARSE_DTDATTR) {
14522
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14523
options -= XML_PARSE_DTDATTR;
14524
ctxt->options |= XML_PARSE_DTDATTR;
14526
if (options & XML_PARSE_NOENT) {
14527
ctxt->replaceEntities = 1;
14528
/* ctxt->loadsubset |= XML_DETECT_IDS; */
14529
options -= XML_PARSE_NOENT;
14530
ctxt->options |= XML_PARSE_NOENT;
14532
ctxt->replaceEntities = 0;
14533
if (options & XML_PARSE_PEDANTIC) {
14534
ctxt->pedantic = 1;
14535
options -= XML_PARSE_PEDANTIC;
14536
ctxt->options |= XML_PARSE_PEDANTIC;
14538
ctxt->pedantic = 0;
14539
if (options & XML_PARSE_NOBLANKS) {
14540
ctxt->keepBlanks = 0;
14541
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14542
options -= XML_PARSE_NOBLANKS;
14543
ctxt->options |= XML_PARSE_NOBLANKS;
14545
ctxt->keepBlanks = 1;
14546
if (options & XML_PARSE_DTDVALID) {
14547
ctxt->validate = 1;
14548
if (options & XML_PARSE_NOWARNING)
14549
ctxt->vctxt.warning = NULL;
14550
if (options & XML_PARSE_NOERROR)
14551
ctxt->vctxt.error = NULL;
14552
options -= XML_PARSE_DTDVALID;
14553
ctxt->options |= XML_PARSE_DTDVALID;
14555
ctxt->validate = 0;
14556
if (options & XML_PARSE_NOWARNING) {
14557
ctxt->sax->warning = NULL;
14558
options -= XML_PARSE_NOWARNING;
14560
if (options & XML_PARSE_NOERROR) {
14561
ctxt->sax->error = NULL;
14562
ctxt->sax->fatalError = NULL;
14563
options -= XML_PARSE_NOERROR;
14565
#ifdef LIBXML_SAX1_ENABLED
14566
if (options & XML_PARSE_SAX1) {
14567
ctxt->sax->startElement = xmlSAX2StartElement;
14568
ctxt->sax->endElement = xmlSAX2EndElement;
14569
ctxt->sax->startElementNs = NULL;
14570
ctxt->sax->endElementNs = NULL;
14571
ctxt->sax->initialized = 1;
14572
options -= XML_PARSE_SAX1;
14573
ctxt->options |= XML_PARSE_SAX1;
14575
#endif /* LIBXML_SAX1_ENABLED */
14576
if (options & XML_PARSE_NODICT) {
14577
ctxt->dictNames = 0;
14578
options -= XML_PARSE_NODICT;
14579
ctxt->options |= XML_PARSE_NODICT;
14581
ctxt->dictNames = 1;
14583
if (options & XML_PARSE_NOCDATA) {
14584
ctxt->sax->cdataBlock = NULL;
14585
options -= XML_PARSE_NOCDATA;
14586
ctxt->options |= XML_PARSE_NOCDATA;
14588
if (options & XML_PARSE_NSCLEAN) {
14589
ctxt->options |= XML_PARSE_NSCLEAN;
14590
options -= XML_PARSE_NSCLEAN;
14592
if (options & XML_PARSE_NONET) {
14593
ctxt->options |= XML_PARSE_NONET;
14594
options -= XML_PARSE_NONET;
14596
if (options & XML_PARSE_COMPACT) {
14597
ctxt->options |= XML_PARSE_COMPACT;
14598
options -= XML_PARSE_COMPACT;
14600
if (options & XML_PARSE_OLD10) {
14601
ctxt->options |= XML_PARSE_OLD10;
14602
options -= XML_PARSE_OLD10;
14604
if (options & XML_PARSE_NOBASEFIX) {
14605
ctxt->options |= XML_PARSE_NOBASEFIX;
14606
options -= XML_PARSE_NOBASEFIX;
14608
if (options & XML_PARSE_HUGE) {
14609
ctxt->options |= XML_PARSE_HUGE;
14610
options -= XML_PARSE_HUGE;
14612
if (options & XML_PARSE_OLDSAX) {
14613
ctxt->options |= XML_PARSE_OLDSAX;
14614
options -= XML_PARSE_OLDSAX;
14616
if (options & XML_PARSE_IGNORE_ENC) {
14617
ctxt->options |= XML_PARSE_IGNORE_ENC;
14618
options -= XML_PARSE_IGNORE_ENC;
14620
ctxt->linenumbers = 1;
14625
* xmlCtxtUseOptions:
14626
* @ctxt: an XML parser context
14627
* @options: a combination of xmlParserOption
14629
* Applies the options to the parser context
14631
* Returns 0 in case of success, the set of unknown or unimplemented options
14632
* in case of error.
14635
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14637
return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14642
* @ctxt: an XML parser context
14643
* @URL: the base URL to use for the document
14644
* @encoding: the document encoding, or NULL
14645
* @options: a combination of xmlParserOption
14646
* @reuse: keep the context for reuse
14648
* Common front-end for the xmlRead functions
14650
* Returns the resulting document tree or NULL
14653
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14654
int options, int reuse)
14658
xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14659
if (encoding != NULL) {
14660
xmlCharEncodingHandlerPtr hdlr;
14662
hdlr = xmlFindCharEncodingHandler(encoding);
14664
xmlSwitchToEncoding(ctxt, hdlr);
14666
if ((URL != NULL) && (ctxt->input != NULL) &&
14667
(ctxt->input->filename == NULL))
14668
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14669
xmlParseDocument(ctxt);
14670
if ((ctxt->wellFormed) || ctxt->recovery)
14674
if (ctxt->myDoc != NULL) {
14675
xmlFreeDoc(ctxt->myDoc);
14678
ctxt->myDoc = NULL;
14680
xmlFreeParserCtxt(ctxt);
14688
* @cur: a pointer to a zero terminated string
14689
* @URL: the base URL to use for the document
14690
* @encoding: the document encoding, or NULL
14691
* @options: a combination of xmlParserOption
14693
* parse an XML in-memory document and build a tree.
14695
* Returns the resulting document tree
14698
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14700
xmlParserCtxtPtr ctxt;
14705
ctxt = xmlCreateDocParserCtxt(cur);
14708
return (xmlDoRead(ctxt, URL, encoding, options, 0));
14713
* @filename: a file or URL
14714
* @encoding: the document encoding, or NULL
14715
* @options: a combination of xmlParserOption
14717
* parse an XML file from the filesystem or the network.
14719
* Returns the resulting document tree
14722
xmlReadFile(const char *filename, const char *encoding, int options)
14724
xmlParserCtxtPtr ctxt;
14726
ctxt = xmlCreateURLParserCtxt(filename, options);
14729
return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14734
* @buffer: a pointer to a char array
14735
* @size: the size of the array
14736
* @URL: the base URL to use for the document
14737
* @encoding: the document encoding, or NULL
14738
* @options: a combination of xmlParserOption
14740
* parse an XML in-memory document and build a tree.
14742
* Returns the resulting document tree
14745
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14747
xmlParserCtxtPtr ctxt;
14749
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14752
return (xmlDoRead(ctxt, URL, encoding, options, 0));
14757
* @fd: an open file descriptor
14758
* @URL: the base URL to use for the document
14759
* @encoding: the document encoding, or NULL
14760
* @options: a combination of xmlParserOption
14762
* parse an XML from a file descriptor and build a tree.
14763
* NOTE that the file descriptor will not be closed when the
14764
* reader is closed or reset.
14766
* Returns the resulting document tree
14769
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14771
xmlParserCtxtPtr ctxt;
14772
xmlParserInputBufferPtr input;
14773
xmlParserInputPtr stream;
14778
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14781
input->closecallback = NULL;
14782
ctxt = xmlNewParserCtxt();
14783
if (ctxt == NULL) {
14784
xmlFreeParserInputBuffer(input);
14787
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14788
if (stream == NULL) {
14789
xmlFreeParserInputBuffer(input);
14790
xmlFreeParserCtxt(ctxt);
14793
inputPush(ctxt, stream);
14794
return (xmlDoRead(ctxt, URL, encoding, options, 0));
14799
* @ioread: an I/O read function
14800
* @ioclose: an I/O close function
14801
* @ioctx: an I/O handler
14802
* @URL: the base URL to use for the document
14803
* @encoding: the document encoding, or NULL
14804
* @options: a combination of xmlParserOption
14806
* parse an XML document from I/O functions and source and build a tree.
14808
* Returns the resulting document tree
14811
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14812
void *ioctx, const char *URL, const char *encoding, int options)
14814
xmlParserCtxtPtr ctxt;
14815
xmlParserInputBufferPtr input;
14816
xmlParserInputPtr stream;
14818
if (ioread == NULL)
14821
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14822
XML_CHAR_ENCODING_NONE);
14823
if (input == NULL) {
14824
if (ioclose != NULL)
14828
ctxt = xmlNewParserCtxt();
14829
if (ctxt == NULL) {
14830
xmlFreeParserInputBuffer(input);
14833
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14834
if (stream == NULL) {
14835
xmlFreeParserInputBuffer(input);
14836
xmlFreeParserCtxt(ctxt);
14839
inputPush(ctxt, stream);
14840
return (xmlDoRead(ctxt, URL, encoding, options, 0));
14845
* @ctxt: an XML parser context
14846
* @cur: a pointer to a zero terminated string
14847
* @URL: the base URL to use for the document
14848
* @encoding: the document encoding, or NULL
14849
* @options: a combination of xmlParserOption
14851
* parse an XML in-memory document and build a tree.
14852
* This reuses the existing @ctxt parser context
14854
* Returns the resulting document tree
14857
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14858
const char *URL, const char *encoding, int options)
14860
xmlParserInputPtr stream;
14867
xmlCtxtReset(ctxt);
14869
stream = xmlNewStringInputStream(ctxt, cur);
14870
if (stream == NULL) {
14873
inputPush(ctxt, stream);
14874
return (xmlDoRead(ctxt, URL, encoding, options, 1));
14879
* @ctxt: an XML parser context
14880
* @filename: a file or URL
14881
* @encoding: the document encoding, or NULL
14882
* @options: a combination of xmlParserOption
14884
* parse an XML file from the filesystem or the network.
14885
* This reuses the existing @ctxt parser context
14887
* Returns the resulting document tree
14890
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14891
const char *encoding, int options)
14893
xmlParserInputPtr stream;
14895
if (filename == NULL)
14900
xmlCtxtReset(ctxt);
14902
stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14903
if (stream == NULL) {
14906
inputPush(ctxt, stream);
14907
return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14911
* xmlCtxtReadMemory:
14912
* @ctxt: an XML parser context
14913
* @buffer: a pointer to a char array
14914
* @size: the size of the array
14915
* @URL: the base URL to use for the document
14916
* @encoding: the document encoding, or NULL
14917
* @options: a combination of xmlParserOption
14919
* parse an XML in-memory document and build a tree.
14920
* This reuses the existing @ctxt parser context
14922
* Returns the resulting document tree
14925
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14926
const char *URL, const char *encoding, int options)
14928
xmlParserInputBufferPtr input;
14929
xmlParserInputPtr stream;
14933
if (buffer == NULL)
14936
xmlCtxtReset(ctxt);
14938
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14939
if (input == NULL) {
14943
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14944
if (stream == NULL) {
14945
xmlFreeParserInputBuffer(input);
14949
inputPush(ctxt, stream);
14950
return (xmlDoRead(ctxt, URL, encoding, options, 1));
14955
* @ctxt: an XML parser context
14956
* @fd: an open file descriptor
14957
* @URL: the base URL to use for the document
14958
* @encoding: the document encoding, or NULL
14959
* @options: a combination of xmlParserOption
14961
* parse an XML from a file descriptor and build a tree.
14962
* This reuses the existing @ctxt parser context
14963
* NOTE that the file descriptor will not be closed when the
14964
* reader is closed or reset.
14966
* Returns the resulting document tree
14969
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14970
const char *URL, const char *encoding, int options)
14972
xmlParserInputBufferPtr input;
14973
xmlParserInputPtr stream;
14980
xmlCtxtReset(ctxt);
14983
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14986
input->closecallback = NULL;
14987
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14988
if (stream == NULL) {
14989
xmlFreeParserInputBuffer(input);
14992
inputPush(ctxt, stream);
14993
return (xmlDoRead(ctxt, URL, encoding, options, 1));
14998
* @ctxt: an XML parser context
14999
* @ioread: an I/O read function
15000
* @ioclose: an I/O close function
15001
* @ioctx: an I/O handler
15002
* @URL: the base URL to use for the document
15003
* @encoding: the document encoding, or NULL
15004
* @options: a combination of xmlParserOption
15006
* parse an XML document from I/O functions and source and build a tree.
15007
* This reuses the existing @ctxt parser context
15009
* Returns the resulting document tree
15012
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15013
xmlInputCloseCallback ioclose, void *ioctx,
15015
const char *encoding, int options)
15017
xmlParserInputBufferPtr input;
15018
xmlParserInputPtr stream;
15020
if (ioread == NULL)
15025
xmlCtxtReset(ctxt);
15027
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15028
XML_CHAR_ENCODING_NONE);
15029
if (input == NULL) {
15030
if (ioclose != NULL)
15034
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15035
if (stream == NULL) {
15036
xmlFreeParserInputBuffer(input);
15039
inputPush(ctxt, stream);
15040
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15043
#define bottom_parser
15044
#include "elfgcchack.h"