2
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
* implemented on top of the SAX interfaces
6
* The XML specification:
7
* http://www.w3.org/TR/REC-xml
8
* Original 1.0 version:
9
* http://www.w3.org/TR/1998/REC-xml-19980210
10
* XML second edition working draft
11
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
13
* Okay this is a big file, the parser core is around 7000 lines, then it
14
* is followed by the progressive parser top routines, then the various
15
* high level APIs to call the parser and a few miscellaneous functions.
16
* A number of helper functions and deprecated ones have been moved to
17
* parserInternals.c to reduce this file size.
18
* As much as possible the functions are associated with their relative
19
* production in the XML specification. A few productions defining the
20
* different ranges of character are actually implanted either in
21
* parserInternals.h or parserInternals.c
22
* The DOM tree build is realized from the default SAX callbacks in
24
* The routines doing the validation checks are in valid.c and called either
25
* from the SAX callbacks or as standalone functions using a preparsed
28
* See Copyright for the status of this software.
36
#if defined(WIN32) && !defined (__CYGWIN__)
37
#define XML_DIR_SEP '\\'
39
#define XML_DIR_SEP '/'
45
#include <libxml/xmlmemory.h>
46
#include <libxml/threads.h>
47
#include <libxml/globals.h>
48
#include <libxml/tree.h>
49
#include <libxml/parser.h>
50
#include <libxml/parserInternals.h>
51
#include <libxml/valid.h>
52
#include <libxml/entities.h>
53
#include <libxml/xmlerror.h>
54
#include <libxml/encoding.h>
55
#include <libxml/xmlIO.h>
56
#include <libxml/uri.h>
57
#ifdef LIBXML_CATALOG_ENABLED
58
#include <libxml/catalog.h>
60
#ifdef LIBXML_SCHEMAS_ENABLED
61
#include <libxml/xmlschemastypes.h>
62
#include <libxml/relaxng.h>
70
#ifdef HAVE_SYS_STAT_H
86
* arbitrary depth limit for the XML documents that we allow to
87
* process. This is not a limitation of the parser but a safety
90
unsigned int xmlParserMaxDepth = 1024;
94
#define XML_PARSER_BIG_BUFFER_SIZE 300
95
#define XML_PARSER_BUFFER_SIZE 100
97
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
100
* List of XML prefixed PI allowed by W3C specs
103
static const char *xmlW3CPIs[] = {
109
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
110
xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111
const xmlChar **str);
113
static xmlParserErrors
114
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115
xmlSAXHandlerPtr sax,
116
void *user_data, int depth, const xmlChar *URL,
117
const xmlChar *ID, xmlNodePtr *list);
119
#ifdef LIBXML_LEGACY_ENABLED
121
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122
xmlNodePtr lastNode);
123
#endif /* LIBXML_LEGACY_ENABLED */
125
static xmlParserErrors
126
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127
const xmlChar *string, void *user_data, xmlNodePtr *lst);
129
/************************************************************************
131
* Some factorized error routines *
133
************************************************************************/
136
* xmlErrAttributeDup:
137
* @ctxt: an XML parser context
138
* @prefix: the attribute prefix
139
* @localname: the attribute localname
141
* Handle a redefinition of attribute error
144
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145
const xmlChar * localname)
147
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148
(ctxt->instate == XML_PARSER_EOF))
151
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
153
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
154
ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155
(const char *) localname, NULL, NULL, 0, 0,
156
"Attribute %s redefined\n", localname);
158
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
159
ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160
(const char *) prefix, (const char *) localname,
161
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
164
ctxt->wellFormed = 0;
165
if (ctxt->recovery == 0)
166
ctxt->disableSAX = 1;
172
* @ctxt: an XML parser context
173
* @error: the error number
174
* @extra: extra information string
176
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
179
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
183
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184
(ctxt->instate == XML_PARSER_EOF))
187
case XML_ERR_INVALID_HEX_CHARREF:
188
errmsg = "CharRef: invalid hexadecimal value\n";
190
case XML_ERR_INVALID_DEC_CHARREF:
191
errmsg = "CharRef: invalid decimal value\n";
193
case XML_ERR_INVALID_CHARREF:
194
errmsg = "CharRef: invalid value\n";
196
case XML_ERR_INTERNAL_ERROR:
197
errmsg = "internal error";
199
case XML_ERR_PEREF_AT_EOF:
200
errmsg = "PEReference at end of document\n";
202
case XML_ERR_PEREF_IN_PROLOG:
203
errmsg = "PEReference in prolog\n";
205
case XML_ERR_PEREF_IN_EPILOG:
206
errmsg = "PEReference in epilog\n";
208
case XML_ERR_PEREF_NO_NAME:
209
errmsg = "PEReference: no name\n";
211
case XML_ERR_PEREF_SEMICOL_MISSING:
212
errmsg = "PEReference: expecting ';'\n";
214
case XML_ERR_ENTITY_LOOP:
215
errmsg = "Detected an entity reference loop\n";
217
case XML_ERR_ENTITY_NOT_STARTED:
218
errmsg = "EntityValue: \" or ' expected\n";
220
case XML_ERR_ENTITY_PE_INTERNAL:
221
errmsg = "PEReferences forbidden in internal subset\n";
223
case XML_ERR_ENTITY_NOT_FINISHED:
224
errmsg = "EntityValue: \" or ' expected\n";
226
case XML_ERR_ATTRIBUTE_NOT_STARTED:
227
errmsg = "AttValue: \" or ' expected\n";
229
case XML_ERR_LT_IN_ATTRIBUTE:
230
errmsg = "Unescaped '<' not allowed in attributes values\n";
232
case XML_ERR_LITERAL_NOT_STARTED:
233
errmsg = "SystemLiteral \" or ' expected\n";
235
case XML_ERR_LITERAL_NOT_FINISHED:
236
errmsg = "Unfinished System or Public ID \" or ' expected\n";
238
case XML_ERR_MISPLACED_CDATA_END:
239
errmsg = "Sequence ']]>' not allowed in content\n";
241
case XML_ERR_URI_REQUIRED:
242
errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
244
case XML_ERR_PUBID_REQUIRED:
245
errmsg = "PUBLIC, the Public Identifier is missing\n";
247
case XML_ERR_HYPHEN_IN_COMMENT:
248
errmsg = "Comment must not contain '--' (double-hyphen)\n";
250
case XML_ERR_PI_NOT_STARTED:
251
errmsg = "xmlParsePI : no target name\n";
253
case XML_ERR_RESERVED_XML_NAME:
254
errmsg = "Invalid PI name\n";
256
case XML_ERR_NOTATION_NOT_STARTED:
257
errmsg = "NOTATION: Name expected here\n";
259
case XML_ERR_NOTATION_NOT_FINISHED:
260
errmsg = "'>' required to close NOTATION declaration\n";
262
case XML_ERR_VALUE_REQUIRED:
263
errmsg = "Entity value required\n";
265
case XML_ERR_URI_FRAGMENT:
266
errmsg = "Fragment not allowed";
268
case XML_ERR_ATTLIST_NOT_STARTED:
269
errmsg = "'(' required to start ATTLIST enumeration\n";
271
case XML_ERR_NMTOKEN_REQUIRED:
272
errmsg = "NmToken expected in ATTLIST enumeration\n";
274
case XML_ERR_ATTLIST_NOT_FINISHED:
275
errmsg = "')' required to finish ATTLIST enumeration\n";
277
case XML_ERR_MIXED_NOT_STARTED:
278
errmsg = "MixedContentDecl : '|' or ')*' expected\n";
280
case XML_ERR_PCDATA_REQUIRED:
281
errmsg = "MixedContentDecl : '#PCDATA' expected\n";
283
case XML_ERR_ELEMCONTENT_NOT_STARTED:
284
errmsg = "ContentDecl : Name or '(' expected\n";
286
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
287
errmsg = "ContentDecl : ',' '|' or ')' expected\n";
289
case XML_ERR_PEREF_IN_INT_SUBSET:
291
"PEReference: forbidden within markup decl in internal subset\n";
293
case XML_ERR_GT_REQUIRED:
294
errmsg = "expected '>'\n";
296
case XML_ERR_CONDSEC_INVALID:
297
errmsg = "XML conditional section '[' expected\n";
299
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
300
errmsg = "Content error in the external subset\n";
302
case XML_ERR_CONDSEC_INVALID_KEYWORD:
304
"conditional section INCLUDE or IGNORE keyword expected\n";
306
case XML_ERR_CONDSEC_NOT_FINISHED:
307
errmsg = "XML conditional section not closed\n";
309
case XML_ERR_XMLDECL_NOT_STARTED:
310
errmsg = "Text declaration '<?xml' required\n";
312
case XML_ERR_XMLDECL_NOT_FINISHED:
313
errmsg = "parsing XML declaration: '?>' expected\n";
315
case XML_ERR_EXT_ENTITY_STANDALONE:
316
errmsg = "external parsed entities cannot be standalone\n";
318
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
319
errmsg = "EntityRef: expecting ';'\n";
321
case XML_ERR_DOCTYPE_NOT_FINISHED:
322
errmsg = "DOCTYPE improperly terminated\n";
324
case XML_ERR_LTSLASH_REQUIRED:
325
errmsg = "EndTag: '</' not found\n";
327
case XML_ERR_EQUAL_REQUIRED:
328
errmsg = "expected '='\n";
330
case XML_ERR_STRING_NOT_CLOSED:
331
errmsg = "String not closed expecting \" or '\n";
333
case XML_ERR_STRING_NOT_STARTED:
334
errmsg = "String not started expecting ' or \"\n";
336
case XML_ERR_ENCODING_NAME:
337
errmsg = "Invalid XML encoding name\n";
339
case XML_ERR_STANDALONE_VALUE:
340
errmsg = "standalone accepts only 'yes' or 'no'\n";
342
case XML_ERR_DOCUMENT_EMPTY:
343
errmsg = "Document is empty\n";
345
case XML_ERR_DOCUMENT_END:
346
errmsg = "Extra content at the end of the document\n";
348
case XML_ERR_NOT_WELL_BALANCED:
349
errmsg = "chunk is not well balanced\n";
351
case XML_ERR_EXTRA_CONTENT:
352
errmsg = "extra content at the end of well balanced chunk\n";
354
case XML_ERR_VERSION_MISSING:
355
errmsg = "Malformed declaration expecting version\n";
363
errmsg = "Unregistered error message\n";
367
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
368
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
371
ctxt->wellFormed = 0;
372
if (ctxt->recovery == 0)
373
ctxt->disableSAX = 1;
379
* @ctxt: an XML parser context
380
* @error: the error number
381
* @msg: the error message
383
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
386
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
389
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390
(ctxt->instate == XML_PARSER_EOF))
394
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
395
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
397
ctxt->wellFormed = 0;
398
if (ctxt->recovery == 0)
399
ctxt->disableSAX = 1;
405
* @ctxt: an XML parser context
406
* @error: the error number
407
* @msg: the error message
414
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415
const char *msg, const xmlChar *str1, const xmlChar *str2)
417
xmlStructuredErrorFunc schannel = NULL;
419
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420
(ctxt->instate == XML_PARSER_EOF))
422
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423
(ctxt->sax->initialized == XML_SAX2_MAGIC))
424
schannel = ctxt->sax->serror;
425
__xmlRaiseError(schannel,
426
(ctxt->sax) ? ctxt->sax->warning : NULL,
428
ctxt, NULL, XML_FROM_PARSER, error,
429
XML_ERR_WARNING, NULL, 0,
430
(const char *) str1, (const char *) str2, NULL, 0, 0,
431
msg, (const char *) str1, (const char *) str2);
436
* @ctxt: an XML parser context
437
* @error: the error number
438
* @msg: the error message
441
* Handle a validity error.
444
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445
const char *msg, const xmlChar *str1)
447
xmlStructuredErrorFunc schannel = NULL;
449
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450
(ctxt->instate == XML_PARSER_EOF))
454
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455
schannel = ctxt->sax->serror;
457
__xmlRaiseError(schannel,
458
ctxt->vctxt.error, ctxt->vctxt.userData,
459
ctxt, NULL, XML_FROM_DTD, error,
460
XML_ERR_ERROR, NULL, 0, (const char *) str1,
462
msg, (const char *) str1);
470
* @ctxt: an XML parser context
471
* @error: the error number
472
* @msg: the error message
473
* @val: an integer value
475
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
478
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
479
const char *msg, int val)
481
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482
(ctxt->instate == XML_PARSER_EOF))
486
__xmlRaiseError(NULL, NULL, NULL,
487
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
490
ctxt->wellFormed = 0;
491
if (ctxt->recovery == 0)
492
ctxt->disableSAX = 1;
497
* xmlFatalErrMsgStrIntStr:
498
* @ctxt: an XML parser context
499
* @error: the error number
500
* @msg: the error message
501
* @str1: an string info
502
* @val: an integer value
503
* @str2: an string info
505
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
508
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509
const char *msg, const xmlChar *str1, int val,
512
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513
(ctxt->instate == XML_PARSER_EOF))
517
__xmlRaiseError(NULL, NULL, NULL,
518
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519
NULL, 0, (const char *) str1, (const char *) str2,
520
NULL, val, 0, msg, str1, val, str2);
522
ctxt->wellFormed = 0;
523
if (ctxt->recovery == 0)
524
ctxt->disableSAX = 1;
530
* @ctxt: an XML parser context
531
* @error: the error number
532
* @msg: the error message
533
* @val: a string value
535
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
538
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
539
const char *msg, const xmlChar * val)
541
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542
(ctxt->instate == XML_PARSER_EOF))
546
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
547
XML_FROM_PARSER, error, XML_ERR_FATAL,
548
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
551
ctxt->wellFormed = 0;
552
if (ctxt->recovery == 0)
553
ctxt->disableSAX = 1;
559
* @ctxt: an XML parser context
560
* @error: the error number
561
* @msg: the error message
562
* @val: a string value
564
* Handle a non fatal parser error
567
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568
const char *msg, const xmlChar * val)
570
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571
(ctxt->instate == XML_PARSER_EOF))
575
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
576
XML_FROM_PARSER, error, XML_ERR_ERROR,
577
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
583
* @ctxt: an XML parser context
584
* @error: the error number
586
* @info1: extra information string
587
* @info2: extra information string
589
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
592
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
594
const xmlChar * info1, const xmlChar * info2,
595
const xmlChar * info3)
597
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598
(ctxt->instate == XML_PARSER_EOF))
602
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
603
XML_ERR_ERROR, NULL, 0, (const char *) info1,
604
(const char *) info2, (const char *) info3, 0, 0, msg,
605
info1, info2, info3);
607
ctxt->nsWellFormed = 0;
610
/************************************************************************
612
* Library wide options *
614
************************************************************************/
618
* @feature: the feature to be examined
620
* Examines if the library has been compiled with a given feature.
622
* Returns a non-zero value if the feature exist, otherwise zero.
623
* Returns zero (0) if the feature does not exist or an unknown
624
* unknown feature is requested, non-zero otherwise.
627
xmlHasFeature(xmlFeature feature)
630
case XML_WITH_THREAD:
631
#ifdef LIBXML_THREAD_ENABLED
637
#ifdef LIBXML_TREE_ENABLED
642
case XML_WITH_OUTPUT:
643
#ifdef LIBXML_OUTPUT_ENABLED
649
#ifdef LIBXML_PUSH_ENABLED
654
case XML_WITH_READER:
655
#ifdef LIBXML_READER_ENABLED
660
case XML_WITH_PATTERN:
661
#ifdef LIBXML_PATTERN_ENABLED
666
case XML_WITH_WRITER:
667
#ifdef LIBXML_WRITER_ENABLED
673
#ifdef LIBXML_SAX1_ENABLED
679
#ifdef LIBXML_FTP_ENABLED
685
#ifdef LIBXML_HTTP_ENABLED
691
#ifdef LIBXML_VALID_ENABLED
697
#ifdef LIBXML_HTML_ENABLED
702
case XML_WITH_LEGACY:
703
#ifdef LIBXML_LEGACY_ENABLED
709
#ifdef LIBXML_C14N_ENABLED
714
case XML_WITH_CATALOG:
715
#ifdef LIBXML_CATALOG_ENABLED
721
#ifdef LIBXML_XPATH_ENABLED
727
#ifdef LIBXML_XPTR_ENABLED
732
case XML_WITH_XINCLUDE:
733
#ifdef LIBXML_XINCLUDE_ENABLED
739
#ifdef LIBXML_ICONV_ENABLED
744
case XML_WITH_ISO8859X:
745
#ifdef LIBXML_ISO8859X_ENABLED
750
case XML_WITH_UNICODE:
751
#ifdef LIBXML_UNICODE_ENABLED
756
case XML_WITH_REGEXP:
757
#ifdef LIBXML_REGEXP_ENABLED
762
case XML_WITH_AUTOMATA:
763
#ifdef LIBXML_AUTOMATA_ENABLED
769
#ifdef LIBXML_EXPR_ENABLED
774
case XML_WITH_SCHEMAS:
775
#ifdef LIBXML_SCHEMAS_ENABLED
780
case XML_WITH_SCHEMATRON:
781
#ifdef LIBXML_SCHEMATRON_ENABLED
786
case XML_WITH_MODULES:
787
#ifdef LIBXML_MODULES_ENABLED
793
#ifdef LIBXML_DEBUG_ENABLED
798
case XML_WITH_DEBUG_MEM:
799
#ifdef DEBUG_MEMORY_LOCATION
804
case XML_WITH_DEBUG_RUN:
805
#ifdef LIBXML_DEBUG_RUNTIME
811
#ifdef LIBXML_ZLIB_ENABLED
822
/************************************************************************
824
* SAX2 defaulted attributes handling *
826
************************************************************************/
830
* @ctxt: an XML parser context
832
* Do the SAX2 detection and specific intialization
835
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836
if (ctxt == NULL) return;
837
#ifdef LIBXML_SAX1_ENABLED
838
if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839
((ctxt->sax->startElementNs != NULL) ||
840
(ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
843
#endif /* LIBXML_SAX1_ENABLED */
845
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
848
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849
(ctxt->str_xml_ns == NULL)) {
850
xmlErrMemory(ctxt, NULL);
854
typedef struct _xmlDefAttrs xmlDefAttrs;
855
typedef xmlDefAttrs *xmlDefAttrsPtr;
856
struct _xmlDefAttrs {
857
int nbAttrs; /* number of defaulted attributes on that element */
858
int maxAttrs; /* the size of the array */
859
const xmlChar *values[4]; /* array of localname/prefix/values */
864
* @ctxt: an XML parser context
865
* @fullname: the element fullname
866
* @fullattr: the attribute fullname
867
* @value: the attribute value
869
* Add a defaulted attribute for an element
872
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873
const xmlChar *fullname,
874
const xmlChar *fullattr,
875
const xmlChar *value) {
876
xmlDefAttrsPtr defaults;
879
const xmlChar *prefix;
881
if (ctxt->attsDefault == NULL) {
882
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
883
if (ctxt->attsDefault == NULL)
888
* split the element name into prefix:localname , the string found
889
* are within the DTD and then not associated to namespace names.
891
name = xmlSplitQName3(fullname, &len);
893
name = xmlDictLookup(ctxt->dict, fullname, -1);
896
name = xmlDictLookup(ctxt->dict, name, -1);
897
prefix = xmlDictLookup(ctxt->dict, fullname, len);
901
* make sure there is some storage
903
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904
if (defaults == NULL) {
905
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
906
(4 * 4) * sizeof(const xmlChar *));
907
if (defaults == NULL)
909
defaults->nbAttrs = 0;
910
defaults->maxAttrs = 4;
911
xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
915
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
916
(2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
920
defaults->maxAttrs *= 2;
921
xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
925
* Split the element name into prefix:localname , the string found
926
* are within the DTD and hen not associated to namespace names.
928
name = xmlSplitQName3(fullattr, &len);
930
name = xmlDictLookup(ctxt->dict, fullattr, -1);
933
name = xmlDictLookup(ctxt->dict, name, -1);
934
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
937
defaults->values[4 * defaults->nbAttrs] = name;
938
defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939
/* intern the string and precompute the end */
940
len = xmlStrlen(value);
941
value = xmlDictLookup(ctxt->dict, value, len);
942
defaults->values[4 * defaults->nbAttrs + 2] = value;
943
defaults->values[4 * defaults->nbAttrs + 3] = value + len;
949
xmlErrMemory(ctxt, NULL);
955
* @ctxt: an XML parser context
956
* @fullname: the element fullname
957
* @fullattr: the attribute fullname
958
* @type: the attribute type
960
* Register that this attribute is not CDATA
963
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964
const xmlChar *fullname,
965
const xmlChar *fullattr,
968
if (ctxt->attsSpecial == NULL) {
969
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
970
if (ctxt->attsSpecial == NULL)
974
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975
(void *) (long) type);
979
xmlErrMemory(ctxt, NULL);
984
* xmlCheckLanguageID:
985
* @lang: pointer to the string value
987
* Checks that the value conforms to the LanguageID production:
989
* NOTE: this is somewhat deprecated, those productions were removed from
990
* the XML Second edition.
992
* [33] LanguageID ::= Langcode ('-' Subcode)*
993
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
994
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997
* [38] Subcode ::= ([a-z] | [A-Z])+
999
* Returns 1 if correct 0 otherwise
1002
xmlCheckLanguageID(const xmlChar * lang)
1004
const xmlChar *cur = lang;
1008
if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009
((cur[0] == 'I') && (cur[1] == '-'))) {
1014
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015
((cur[0] >= 'a') && (cur[0] <= 'z')))
1017
} else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018
((cur[0] == 'X') && (cur[1] == '-'))) {
1023
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024
((cur[0] >= 'a') && (cur[0] <= 'z')))
1026
} else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027
((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1032
if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033
((cur[0] >= 'a') && (cur[0] <= 'z')))
1039
while (cur[0] != 0) { /* non input consuming */
1043
if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044
((cur[0] >= 'a') && (cur[0] <= 'z')))
1048
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049
((cur[0] >= 'a') && (cur[0] <= 'z')))
1055
/************************************************************************
1057
* Parser stacks related functions and macros *
1059
************************************************************************/
1061
xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062
const xmlChar ** str);
1067
* @ctxt: an XML parser context
1068
* @prefix: the namespace prefix or NULL
1069
* @URL: the namespace name
1071
* Pushes a new parser namespace on top of the ns stack
1073
* Returns -1 in case of error, -2 if the namespace should be discarded
1074
* and the index in the stack otherwise.
1077
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1079
if (ctxt->options & XML_PARSE_NSCLEAN) {
1081
for (i = 0;i < ctxt->nsNr;i += 2) {
1082
if (ctxt->nsTab[i] == prefix) {
1084
if (ctxt->nsTab[i + 1] == URL)
1086
/* out of scope keep it */
1091
if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1094
ctxt->nsTab = (const xmlChar **)
1095
xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096
if (ctxt->nsTab == NULL) {
1097
xmlErrMemory(ctxt, NULL);
1101
} else if (ctxt->nsNr >= ctxt->nsMax) {
1103
ctxt->nsTab = (const xmlChar **)
1104
xmlRealloc((char *) ctxt->nsTab,
1105
ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106
if (ctxt->nsTab == NULL) {
1107
xmlErrMemory(ctxt, NULL);
1112
ctxt->nsTab[ctxt->nsNr++] = prefix;
1113
ctxt->nsTab[ctxt->nsNr++] = URL;
1114
return (ctxt->nsNr);
1118
* @ctxt: an XML parser context
1119
* @nr: the number to pop
1121
* Pops the top @nr parser prefix/namespace from the ns stack
1123
* Returns the number of namespaces removed
1126
nsPop(xmlParserCtxtPtr ctxt, int nr)
1130
if (ctxt->nsTab == NULL) return(0);
1131
if (ctxt->nsNr < nr) {
1132
xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1135
if (ctxt->nsNr <= 0)
1138
for (i = 0;i < nr;i++) {
1140
ctxt->nsTab[ctxt->nsNr] = NULL;
1147
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148
const xmlChar **atts;
1152
if (ctxt->atts == NULL) {
1153
maxatts = 55; /* allow for 10 attrs by default */
1154
atts = (const xmlChar **)
1155
xmlMalloc(maxatts * sizeof(xmlChar *));
1156
if (atts == NULL) goto mem_error;
1158
attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159
if (attallocs == NULL) goto mem_error;
1160
ctxt->attallocs = attallocs;
1161
ctxt->maxatts = maxatts;
1162
} else if (nr + 5 > ctxt->maxatts) {
1163
maxatts = (nr + 5) * 2;
1164
atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165
maxatts * sizeof(const xmlChar *));
1166
if (atts == NULL) goto mem_error;
1168
attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169
(maxatts / 5) * sizeof(int));
1170
if (attallocs == NULL) goto mem_error;
1171
ctxt->attallocs = attallocs;
1172
ctxt->maxatts = maxatts;
1174
return(ctxt->maxatts);
1176
xmlErrMemory(ctxt, NULL);
1182
* @ctxt: an XML parser context
1183
* @value: the parser input
1185
* Pushes a new parser input on top of the input stack
1187
* Returns 0 in case of error, the index in the stack otherwise
1190
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1192
if ((ctxt == NULL) || (value == NULL))
1194
if (ctxt->inputNr >= ctxt->inputMax) {
1195
ctxt->inputMax *= 2;
1197
(xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1199
sizeof(ctxt->inputTab[0]));
1200
if (ctxt->inputTab == NULL) {
1201
xmlErrMemory(ctxt, NULL);
1205
ctxt->inputTab[ctxt->inputNr] = value;
1206
ctxt->input = value;
1207
return (ctxt->inputNr++);
1211
* @ctxt: an XML parser context
1213
* Pops the top parser input from the input stack
1215
* Returns the input just removed
1218
inputPop(xmlParserCtxtPtr ctxt)
1220
xmlParserInputPtr ret;
1224
if (ctxt->inputNr <= 0)
1227
if (ctxt->inputNr > 0)
1228
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1231
ret = ctxt->inputTab[ctxt->inputNr];
1232
ctxt->inputTab[ctxt->inputNr] = NULL;
1237
* @ctxt: an XML parser context
1238
* @value: the element node
1240
* Pushes a new element node on top of the node stack
1242
* Returns 0 in case of error, the index in the stack otherwise
1245
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1247
if (ctxt == NULL) return(0);
1248
if (ctxt->nodeNr >= ctxt->nodeMax) {
1251
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1253
sizeof(ctxt->nodeTab[0]));
1255
xmlErrMemory(ctxt, NULL);
1258
ctxt->nodeTab = tmp;
1261
if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1262
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1263
"Excessive depth in document: change xmlParserMaxDepth = %d\n",
1265
ctxt->instate = XML_PARSER_EOF;
1268
ctxt->nodeTab[ctxt->nodeNr] = value;
1270
return (ctxt->nodeNr++);
1274
* @ctxt: an XML parser context
1276
* Pops the top element node from the node stack
1278
* Returns the node just removed
1281
nodePop(xmlParserCtxtPtr ctxt)
1285
if (ctxt == NULL) return(NULL);
1286
if (ctxt->nodeNr <= 0)
1289
if (ctxt->nodeNr > 0)
1290
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1293
ret = ctxt->nodeTab[ctxt->nodeNr];
1294
ctxt->nodeTab[ctxt->nodeNr] = NULL;
1298
#ifdef LIBXML_PUSH_ENABLED
1301
* @ctxt: an XML parser context
1302
* @value: the element name
1303
* @prefix: the element prefix
1304
* @URI: the element namespace name
1306
* Pushes a new element name/prefix/URL on top of the name stack
1308
* Returns -1 in case of error, the index in the stack otherwise
1311
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312
const xmlChar *prefix, const xmlChar *URI, int nsNr)
1314
if (ctxt->nameNr >= ctxt->nameMax) {
1315
const xmlChar * *tmp;
1318
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1320
sizeof(ctxt->nameTab[0]));
1325
ctxt->nameTab = tmp;
1326
tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1328
sizeof(ctxt->pushTab[0]));
1333
ctxt->pushTab = tmp2;
1335
ctxt->nameTab[ctxt->nameNr] = value;
1337
ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338
ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1339
ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1340
return (ctxt->nameNr++);
1342
xmlErrMemory(ctxt, NULL);
1347
* @ctxt: an XML parser context
1349
* Pops the top element/prefix/URI name from the name stack
1351
* Returns the name just removed
1353
static const xmlChar *
1354
nameNsPop(xmlParserCtxtPtr ctxt)
1358
if (ctxt->nameNr <= 0)
1361
if (ctxt->nameNr > 0)
1362
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1365
ret = ctxt->nameTab[ctxt->nameNr];
1366
ctxt->nameTab[ctxt->nameNr] = NULL;
1369
#endif /* LIBXML_PUSH_ENABLED */
1373
* @ctxt: an XML parser context
1374
* @value: the element name
1376
* Pushes a new element name on top of the name stack
1378
* Returns -1 in case of error, the index in the stack otherwise
1381
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1383
if (ctxt == NULL) return (-1);
1385
if (ctxt->nameNr >= ctxt->nameMax) {
1386
const xmlChar * *tmp;
1388
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1390
sizeof(ctxt->nameTab[0]));
1395
ctxt->nameTab = tmp;
1397
ctxt->nameTab[ctxt->nameNr] = value;
1399
return (ctxt->nameNr++);
1401
xmlErrMemory(ctxt, NULL);
1406
* @ctxt: an XML parser context
1408
* Pops the top element name from the name stack
1410
* Returns the name just removed
1413
namePop(xmlParserCtxtPtr ctxt)
1417
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1420
if (ctxt->nameNr > 0)
1421
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1424
ret = ctxt->nameTab[ctxt->nameNr];
1425
ctxt->nameTab[ctxt->nameNr] = NULL;
1429
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1430
if (ctxt->spaceNr >= ctxt->spaceMax) {
1431
ctxt->spaceMax *= 2;
1432
ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434
if (ctxt->spaceTab == NULL) {
1435
xmlErrMemory(ctxt, NULL);
1439
ctxt->spaceTab[ctxt->spaceNr] = val;
1440
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441
return(ctxt->spaceNr++);
1444
static int spacePop(xmlParserCtxtPtr ctxt) {
1446
if (ctxt->spaceNr <= 0) return(0);
1448
if (ctxt->spaceNr > 0)
1449
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1451
ctxt->space = &ctxt->spaceTab[0];
1452
ret = ctxt->spaceTab[ctxt->spaceNr];
1453
ctxt->spaceTab[ctxt->spaceNr] = -1;
1458
* Macros for accessing the content. Those should be used only by the parser,
1461
* Dirty macros, i.e. one often need to make assumption on the context to
1464
* CUR_PTR return the current pointer to the xmlChar to be parsed.
1465
* To be used with extreme caution since operations consuming
1466
* characters may move the input buffer to a different location !
1467
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468
* This should be used internally by the parser
1469
* only to compare to ASCII values otherwise it would break when
1470
* running with UTF-8 encoding.
1471
* RAW same as CUR but in the input buffer, bypass any token
1472
* extraction that may have been done
1473
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474
* to compare on ASCII based substring.
1475
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1476
* strings without newlines within the parser.
1477
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478
* defined char within the parser.
1479
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1481
* NEXT Skip to the next character, this does the proper decoding
1482
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
1483
* NEXTL(l) Skip the current unicode character of l xmlChars long.
1484
* CUR_CHAR(l) returns the current unicode character (int), set l
1485
* to the number of xmlChars used for the encoding [0-5].
1486
* CUR_SCHAR same but operate on a string instead of the context
1487
* COPY_BUF copy the current unicode char to the target buffer, increment
1489
* GROW, SHRINK handling of input buffers
1492
#define RAW (*ctxt->input->cur)
1493
#define CUR (*ctxt->input->cur)
1494
#define NXT(val) ctxt->input->cur[(val)]
1495
#define CUR_PTR ctxt->input->cur
1497
#define CMP4( s, c1, c2, c3, c4 ) \
1498
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510
((unsigned char *) s)[ 8 ] == c9 )
1511
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513
((unsigned char *) s)[ 9 ] == c10 )
1515
#define SKIP(val) do { \
1516
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1517
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1518
if ((*ctxt->input->cur == 0) && \
1519
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520
xmlPopInput(ctxt); \
1523
#define SKIPL(val) do { \
1525
for(skipl=0; skipl<val; skipl++) { \
1526
if (*(ctxt->input->cur) == '\n') { \
1527
ctxt->input->line++; ctxt->input->col = 1; \
1528
} else ctxt->input->col++; \
1530
ctxt->input->cur++; \
1532
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533
if ((*ctxt->input->cur == 0) && \
1534
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535
xmlPopInput(ctxt); \
1538
#define SHRINK if ((ctxt->progressive == 0) && \
1539
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1543
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544
xmlParserInputShrink(ctxt->input);
1545
if ((*ctxt->input->cur == 0) &&
1546
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1550
#define GROW if ((ctxt->progressive == 0) && \
1551
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1554
static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556
if ((*ctxt->input->cur == 0) &&
1557
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1561
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1563
#define NEXT xmlNextChar(ctxt)
1566
ctxt->input->col++; \
1567
ctxt->input->cur++; \
1569
if (*ctxt->input->cur == 0) \
1570
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1573
#define NEXTL(l) do { \
1574
if (*(ctxt->input->cur) == '\n') { \
1575
ctxt->input->line++; ctxt->input->col = 1; \
1576
} else ctxt->input->col++; \
1577
ctxt->input->cur += l; \
1578
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1581
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1584
#define COPY_BUF(l,b,i,v) \
1585
if (l == 1) b[i++] = (xmlChar) v; \
1586
else i += xmlCopyCharMultiByte(&b[i],v)
1589
* xmlSkipBlankChars:
1590
* @ctxt: the XML parser context
1592
* skip all blanks character found at that point in the input streams.
1593
* It pops up finished entities in the process if allowable at that point.
1595
* Returns the number of space chars skipped
1599
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1603
* It's Okay to use CUR/NEXT here since all the blanks are on
1606
if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1609
* if we are in the document content, go really fast
1611
cur = ctxt->input->cur;
1612
while (IS_BLANK_CH(*cur)) {
1614
ctxt->input->line++; ctxt->input->col = 1;
1619
ctxt->input->cur = cur;
1620
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621
cur = ctxt->input->cur;
1624
ctxt->input->cur = cur;
1629
while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1634
while ((cur == 0) && (ctxt->inputNr > 1) &&
1635
(ctxt->instate != XML_PARSER_COMMENT)) {
1640
* Need to handle support of entities branching here
1642
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643
} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1648
/************************************************************************
1650
* Commodity functions to handle entities *
1652
************************************************************************/
1656
* @ctxt: an XML parser context
1658
* xmlPopInput: the current input pointed by ctxt->input came to an end
1659
* pop it and return the next char.
1661
* Returns the current xmlChar in the parser context
1664
xmlPopInput(xmlParserCtxtPtr ctxt) {
1665
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1666
if (xmlParserDebugEntities)
1667
xmlGenericError(xmlGenericErrorContext,
1668
"Popping input %d\n", ctxt->inputNr);
1669
xmlFreeInputStream(inputPop(ctxt));
1670
if ((*ctxt->input->cur == 0) &&
1671
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672
return(xmlPopInput(ctxt));
1678
* @ctxt: an XML parser context
1679
* @input: an XML parser input fragment (entity, XML fragment ...).
1681
* xmlPushInput: switch to a new input stream which is stacked on top
1682
* of the previous one(s).
1685
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686
if (input == NULL) return;
1688
if (xmlParserDebugEntities) {
1689
if ((ctxt->input != NULL) && (ctxt->input->filename))
1690
xmlGenericError(xmlGenericErrorContext,
1691
"%s(%d): ", ctxt->input->filename,
1693
xmlGenericError(xmlGenericErrorContext,
1694
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1696
inputPush(ctxt, input);
1702
* @ctxt: an XML parser context
1704
* parse Reference declarations
1706
* [66] CharRef ::= '&#' [0-9]+ ';' |
1707
* '&#x' [0-9a-fA-F]+ ';'
1709
* [ WFC: Legal Character ]
1710
* Characters referred to using character references must match the
1711
* production for Char.
1713
* Returns the value parsed (as an int), 0 in case of error
1716
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1717
unsigned int val = 0;
1719
unsigned int outofrange = 0;
1722
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1724
if ((RAW == '&') && (NXT(1) == '#') &&
1728
while (RAW != ';') { /* loop blocked by count */
1733
if ((RAW >= '0') && (RAW <= '9'))
1734
val = val * 16 + (CUR - '0');
1735
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736
val = val * 16 + (CUR - 'a') + 10;
1737
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738
val = val * 16 + (CUR - 'A') + 10;
1740
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1751
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
1756
} else if ((RAW == '&') && (NXT(1) == '#')) {
1759
while (RAW != ';') { /* loop blocked by count */
1764
if ((RAW >= '0') && (RAW <= '9'))
1765
val = val * 10 + (CUR - '0');
1767
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1778
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
1784
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1788
* [ WFC: Legal Character ]
1789
* Characters referred to using character references must match the
1790
* production for Char.
1792
if ((IS_CHAR(val) && (outofrange == 0))) {
1795
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796
"xmlParseCharRef: invalid xmlChar value %d\n",
1803
* xmlParseStringCharRef:
1804
* @ctxt: an XML parser context
1805
* @str: a pointer to an index in the string
1807
* parse Reference declarations, variant parsing from a string rather
1808
* than an an input flow.
1810
* [66] CharRef ::= '&#' [0-9]+ ';' |
1811
* '&#x' [0-9a-fA-F]+ ';'
1813
* [ WFC: Legal Character ]
1814
* Characters referred to using character references must match the
1815
* production for Char.
1817
* Returns the value parsed (as an int), 0 in case of error, str will be
1818
* updated to the current value of the index
1821
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1824
unsigned int val = 0;
1825
unsigned int outofrange = 0;
1827
if ((str == NULL) || (*str == NULL)) return(0);
1830
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1833
while (cur != ';') { /* Non input consuming loop */
1834
if ((cur >= '0') && (cur <= '9'))
1835
val = val * 16 + (cur - '0');
1836
else if ((cur >= 'a') && (cur <= 'f'))
1837
val = val * 16 + (cur - 'a') + 10;
1838
else if ((cur >= 'A') && (cur <= 'F'))
1839
val = val * 16 + (cur - 'A') + 10;
1841
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1853
} else if ((cur == '&') && (ptr[1] == '#')){
1856
while (cur != ';') { /* Non input consuming loops */
1857
if ((cur >= '0') && (cur <= '9'))
1858
val = val * 10 + (cur - '0');
1860
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1873
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1879
* [ WFC: Legal Character ]
1880
* Characters referred to using character references must match the
1881
* production for Char.
1883
if ((IS_CHAR(val) && (outofrange == 0))) {
1886
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887
"xmlParseStringCharRef: invalid xmlChar value %d\n",
1894
* xmlNewBlanksWrapperInputStream:
1895
* @ctxt: an XML parser context
1896
* @entity: an Entity pointer
1898
* Create a new input stream for wrapping
1899
* blanks around a PEReference
1901
* Returns the new input stream or NULL
1904
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1906
static xmlParserInputPtr
1907
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908
xmlParserInputPtr input;
1911
if (entity == NULL) {
1912
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913
"xmlNewBlanksWrapperInputStream entity\n");
1916
if (xmlParserDebugEntities)
1917
xmlGenericError(xmlGenericErrorContext,
1918
"new blanks wrapper for entity: %s\n", entity->name);
1919
input = xmlNewInputStream(ctxt);
1920
if (input == NULL) {
1923
length = xmlStrlen(entity->name) + 5;
1924
buffer = xmlMallocAtomic(length);
1925
if (buffer == NULL) {
1926
xmlErrMemory(ctxt, NULL);
1931
buffer [length-3] = ';';
1932
buffer [length-2] = ' ';
1933
buffer [length-1] = 0;
1934
memcpy(buffer + 2, entity->name, length - 5);
1935
input->free = deallocblankswrapper;
1936
input->base = buffer;
1937
input->cur = buffer;
1938
input->length = length;
1939
input->end = &buffer[length];
1944
* xmlParserHandlePEReference:
1945
* @ctxt: the parser context
1947
* [69] PEReference ::= '%' Name ';'
1949
* [ WFC: No Recursion ]
1950
* A parsed entity must not contain a recursive
1951
* reference to itself, either directly or indirectly.
1953
* [ WFC: Entity Declared ]
1954
* In a document without any DTD, a document with only an internal DTD
1955
* subset which contains no parameter entity references, or a document
1956
* with "standalone='yes'", ... ... The declaration of a parameter
1957
* entity must precede any reference to it...
1959
* [ VC: Entity Declared ]
1960
* In a document with an external subset or external parameter entities
1961
* with "standalone='no'", ... ... The declaration of a parameter entity
1962
* must precede any reference to it...
1965
* Parameter-entity references may only appear in the DTD.
1966
* NOTE: misleading but this is handled.
1968
* A PEReference may have been detected in the current input stream
1969
* the handling is done accordingly to
1970
* http://www.w3.org/TR/REC-xml#entproc
1972
* - Included in literal in entity values
1973
* - Included as Parameter Entity reference within DTDs
1976
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1977
const xmlChar *name;
1978
xmlEntityPtr entity = NULL;
1979
xmlParserInputPtr input;
1981
if (RAW != '%') return;
1982
switch(ctxt->instate) {
1983
case XML_PARSER_CDATA_SECTION:
1985
case XML_PARSER_COMMENT:
1987
case XML_PARSER_START_TAG:
1989
case XML_PARSER_END_TAG:
1991
case XML_PARSER_EOF:
1992
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
1994
case XML_PARSER_PROLOG:
1995
case XML_PARSER_START:
1996
case XML_PARSER_MISC:
1997
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
1999
case XML_PARSER_ENTITY_DECL:
2000
case XML_PARSER_CONTENT:
2001
case XML_PARSER_ATTRIBUTE_VALUE:
2003
case XML_PARSER_SYSTEM_LITERAL:
2004
case XML_PARSER_PUBLIC_LITERAL:
2005
/* we just ignore it there */
2007
case XML_PARSER_EPILOG:
2008
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2010
case XML_PARSER_ENTITY_VALUE:
2012
* NOTE: in the case of entity values, we don't do the
2013
* substitution here since we need the literal
2014
* entity value to be able to save the internal
2015
* subset of the document.
2016
* This will be handled by xmlStringDecodeEntities
2019
case XML_PARSER_DTD:
2021
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022
* In the internal DTD subset, parameter-entity references
2023
* can occur only where markup declarations can occur, not
2024
* within markup declarations.
2025
* In that case this is handled in xmlParseMarkupDecl
2027
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2029
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2032
case XML_PARSER_IGNORE:
2037
name = xmlParseName(ctxt);
2038
if (xmlParserDebugEntities)
2039
xmlGenericError(xmlGenericErrorContext,
2040
"PEReference: %s\n", name);
2042
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2046
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048
if (entity == NULL) {
2051
* [ WFC: Entity Declared ]
2052
* In a document without any DTD, a document with only an
2053
* internal DTD subset which contains no parameter entity
2054
* references, or a document with "standalone='yes'", ...
2055
* ... The declaration of a parameter entity must precede
2056
* any reference to it...
2058
if ((ctxt->standalone == 1) ||
2059
((ctxt->hasExternalSubset == 0) &&
2060
(ctxt->hasPErefs == 0))) {
2061
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2062
"PEReference: %%%s; not found\n", name);
2065
* [ VC: Entity Declared ]
2066
* In a document with an external subset or external
2067
* parameter entities with "standalone='no'", ...
2068
* ... The declaration of a parameter entity must precede
2069
* any reference to it...
2071
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073
"PEReference: %%%s; not found\n",
2076
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077
"PEReference: %%%s; not found\n",
2081
} else if (ctxt->input->free != deallocblankswrapper) {
2082
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083
xmlPushInput(ctxt, input);
2085
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2088
xmlCharEncoding enc;
2091
* handle the extra spaces added before and after
2092
* c.f. http://www.w3.org/TR/REC-xml#as-PE
2093
* this is done independently.
2095
input = xmlNewEntityInputStream(ctxt, entity);
2096
xmlPushInput(ctxt, input);
2099
* Get the 4 first bytes and decode the charset
2100
* if enc != XML_CHAR_ENCODING_NONE
2101
* plug some encoding conversion routines.
2102
* Note that, since we may have some non-UTF8
2103
* encoding (like UTF16, bug 135229), the 'length'
2104
* is not known, but we can calculate based upon
2105
* the amount of data in the buffer.
2108
if ((ctxt->input->end - ctxt->input->cur)>=4) {
2113
enc = xmlDetectCharEncoding(start, 4);
2114
if (enc != XML_CHAR_ENCODING_NONE) {
2115
xmlSwitchEncoding(ctxt, enc);
2119
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2120
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121
(IS_BLANK_CH(NXT(5)))) {
2122
xmlParseTextDecl(ctxt);
2125
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126
"PEReference: %s is not a parameter entity\n",
2131
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2137
* Macro used to grow the current buffer.
2139
#define growBuffer(buffer) { \
2141
buffer##_size *= 2; \
2143
xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2144
if (tmp == NULL) goto mem_error; \
2149
* xmlStringLenDecodeEntities:
2150
* @ctxt: the parser context
2151
* @str: the input string
2152
* @len: the string length
2153
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154
* @end: an end marker xmlChar, 0 if none
2155
* @end2: an end marker xmlChar, 0 if none
2156
* @end3: an end marker xmlChar, 0 if none
2158
* Takes a entity string content and process to do the adequate substitutions.
2160
* [67] Reference ::= EntityRef | CharRef
2162
* [69] PEReference ::= '%' Name ';'
2164
* Returns A newly allocated string with the substitution done. The caller
2165
* must deallocate it !
2168
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169
int what, xmlChar end, xmlChar end2, xmlChar end3) {
2170
xmlChar *buffer = NULL;
2171
int buffer_size = 0;
2173
xmlChar *current = NULL;
2174
const xmlChar *last;
2179
if ((ctxt == NULL) || (str == NULL) || (len < 0))
2183
if (ctxt->depth > 40) {
2184
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2189
* allocate a translation buffer.
2191
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2192
buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2193
if (buffer == NULL) goto mem_error;
2196
* OK loop until we reach one of the ending char or a size limit.
2197
* we are operating on already parsed values.
2200
c = CUR_SCHAR(str, l);
2203
while ((c != 0) && (c != end) && /* non input consuming loop */
2204
(c != end2) && (c != end3)) {
2207
if ((c == '&') && (str[1] == '#')) {
2208
int val = xmlParseStringCharRef(ctxt, &str);
2210
COPY_BUF(0,buffer,nbchars,val);
2212
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2215
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216
if (xmlParserDebugEntities)
2217
xmlGenericError(xmlGenericErrorContext,
2218
"String decoding Entity Reference: %.30s\n",
2220
ent = xmlParseStringEntityRef(ctxt, &str);
2221
if ((ent != NULL) &&
2222
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223
if (ent->content != NULL) {
2224
COPY_BUF(0,buffer,nbchars,ent->content[0]);
2225
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2229
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230
"predefined entity has no content\n");
2232
} else if ((ent != NULL) && (ent->content != NULL)) {
2236
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2241
while (*current != 0) { /* non input consuming loop */
2242
buffer[nbchars++] = *current++;
2244
buffer_size - XML_PARSER_BUFFER_SIZE) {
2250
} else if (ent != NULL) {
2251
int i = xmlStrlen(ent->name);
2252
const xmlChar *cur = ent->name;
2254
buffer[nbchars++] = '&';
2255
if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2259
buffer[nbchars++] = *cur++;
2260
buffer[nbchars++] = ';';
2262
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263
if (xmlParserDebugEntities)
2264
xmlGenericError(xmlGenericErrorContext,
2265
"String decoding PE Reference: %.30s\n", str);
2266
ent = xmlParseStringPEReference(ctxt, &str);
2271
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2276
while (*current != 0) { /* non input consuming loop */
2277
buffer[nbchars++] = *current++;
2279
buffer_size - XML_PARSER_BUFFER_SIZE) {
2287
COPY_BUF(l,buffer,nbchars,c);
2289
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2294
c = CUR_SCHAR(str, l);
2298
buffer[nbchars++] = 0;
2302
xmlErrMemory(ctxt, NULL);
2307
* xmlStringDecodeEntities:
2308
* @ctxt: the parser context
2309
* @str: the input string
2310
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311
* @end: an end marker xmlChar, 0 if none
2312
* @end2: an end marker xmlChar, 0 if none
2313
* @end3: an end marker xmlChar, 0 if none
2315
* Takes a entity string content and process to do the adequate substitutions.
2317
* [67] Reference ::= EntityRef | CharRef
2319
* [69] PEReference ::= '%' Name ';'
2321
* Returns A newly allocated string with the substitution done. The caller
2322
* must deallocate it !
2325
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326
xmlChar end, xmlChar end2, xmlChar end3) {
2327
if ((ctxt == NULL) || (str == NULL)) return(NULL);
2328
return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2332
/************************************************************************
2334
* Commodity functions, cleanup needed ? *
2336
************************************************************************/
2340
* @ctxt: an XML parser context
2342
* @len: the size of @str
2343
* @blank_chars: we know the chars are blanks
2345
* Is this a sequence of blank chars that one can ignore ?
2347
* Returns 1 if ignorable 0 otherwise.
2350
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2353
xmlNodePtr lastChild;
2356
* Don't spend time trying to differentiate them, the same callback is
2359
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2363
* Check for xml:space value.
2365
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366
(*(ctxt->space) == -2))
2370
* Check that the string is made of blanks
2372
if (blank_chars == 0) {
2373
for (i = 0;i < len;i++)
2374
if (!(IS_BLANK_CH(str[i]))) return(0);
2378
* Look if the element is mixed content in the DTD if available
2380
if (ctxt->node == NULL) return(0);
2381
if (ctxt->myDoc != NULL) {
2382
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383
if (ret == 0) return(1);
2384
if (ret == 1) return(0);
2388
* Otherwise, heuristic :-\
2390
if ((RAW != '<') && (RAW != 0xD)) return(0);
2391
if ((ctxt->node->children == NULL) &&
2392
(RAW == '<') && (NXT(1) == '/')) return(0);
2394
lastChild = xmlGetLastChild(ctxt->node);
2395
if (lastChild == NULL) {
2396
if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397
(ctxt->node->content != NULL)) return(0);
2398
} else if (xmlNodeIsText(lastChild))
2400
else if ((ctxt->node->children != NULL) &&
2401
(xmlNodeIsText(ctxt->node->children)))
2406
/************************************************************************
2408
* Extra stuff for namespace support *
2409
* Relates to http://www.w3.org/TR/WD-xml-names *
2411
************************************************************************/
2415
* @ctxt: an XML parser context
2416
* @name: an XML parser context
2417
* @prefix: a xmlChar **
2419
* parse an UTF8 encoded XML qualified name string
2421
* [NS 5] QName ::= (Prefix ':')? LocalPart
2423
* [NS 6] Prefix ::= NCName
2425
* [NS 7] LocalPart ::= NCName
2427
* Returns the local part, and prefix is updated
2428
* to get the Prefix if any.
2432
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433
xmlChar buf[XML_MAX_NAMELEN + 5];
2434
xmlChar *buffer = NULL;
2436
int max = XML_MAX_NAMELEN;
2437
xmlChar *ret = NULL;
2438
const xmlChar *cur = name;
2441
if (prefix == NULL) return(NULL);
2444
if (cur == NULL) return(NULL);
2446
#ifndef XML_XML_NAMESPACE
2447
/* xml: prefix is not really a namespace */
2448
if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449
(cur[2] == 'l') && (cur[3] == ':'))
2450
return(xmlStrdup(name));
2453
/* nasty but well=formed */
2455
return(xmlStrdup(name));
2458
while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2464
* Okay someone managed to make a huge name, so he's ready to pay
2465
* for the processing speed.
2469
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2470
if (buffer == NULL) {
2471
xmlErrMemory(ctxt, NULL);
2474
memcpy(buffer, buf, len);
2475
while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476
if (len + 10 > max) {
2480
tmp = (xmlChar *) xmlRealloc(buffer,
2481
max * sizeof(xmlChar));
2484
xmlErrMemory(ctxt, NULL);
2495
if ((c == ':') && (*cur == 0)) {
2499
return(xmlStrdup(name));
2503
ret = xmlStrndup(buf, len);
2507
max = XML_MAX_NAMELEN;
2515
return(xmlStrndup(BAD_CAST "", 0));
2520
* Check that the first character is proper to start
2523
if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524
((c >= 0x41) && (c <= 0x5A)) ||
2525
(c == '_') || (c == ':'))) {
2527
int first = CUR_SCHAR(cur, l);
2529
if (!IS_LETTER(first) && (first != '_')) {
2530
xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2531
"Name %s is not XML Namespace compliant\n",
2537
while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2543
* Okay someone managed to make a huge name, so he's ready to pay
2544
* for the processing speed.
2548
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2549
if (buffer == NULL) {
2550
xmlErrMemory(ctxt, NULL);
2553
memcpy(buffer, buf, len);
2554
while (c != 0) { /* tested bigname2.xml */
2555
if (len + 10 > max) {
2559
tmp = (xmlChar *) xmlRealloc(buffer,
2560
max * sizeof(xmlChar));
2562
xmlErrMemory(ctxt, NULL);
2575
ret = xmlStrndup(buf, len);
2584
/************************************************************************
2586
* The parser itself *
2587
* Relates to http://www.w3.org/TR/REC-xml *
2589
************************************************************************/
2591
static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2592
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2593
int *len, int *alloc, int normalize);
2597
* @ctxt: an XML parser context
2599
* parse an XML name.
2601
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602
* CombiningChar | Extender
2604
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
2606
* [6] Names ::= Name (#x20 Name)*
2608
* Returns the Name parsed or NULL
2612
xmlParseName(xmlParserCtxtPtr ctxt) {
2620
* Accelerator for simple ASCII names
2622
in = ctxt->input->cur;
2623
if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624
((*in >= 0x41) && (*in <= 0x5A)) ||
2625
(*in == '_') || (*in == ':')) {
2627
while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628
((*in >= 0x41) && (*in <= 0x5A)) ||
2629
((*in >= 0x30) && (*in <= 0x39)) ||
2630
(*in == '_') || (*in == '-') ||
2631
(*in == ':') || (*in == '.'))
2633
if ((*in > 0) && (*in < 0x80)) {
2634
count = in - ctxt->input->cur;
2635
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2636
ctxt->input->cur = in;
2637
ctxt->nbChars += count;
2638
ctxt->input->col += count;
2640
xmlErrMemory(ctxt, NULL);
2644
return(xmlParseNameComplex(ctxt));
2648
* xmlParseNameAndCompare:
2649
* @ctxt: an XML parser context
2651
* parse an XML name and compares for match
2652
* (specialized for endtag parsing)
2654
* Returns NULL for an illegal name, (xmlChar*) 1 for success
2655
* and the name for mismatch
2658
static const xmlChar *
2659
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2660
register const xmlChar *cmp = other;
2661
register const xmlChar *in;
2666
in = ctxt->input->cur;
2667
while (*in != 0 && *in == *cmp) {
2672
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2674
ctxt->input->cur = in;
2675
return (const xmlChar*) 1;
2677
/* failure (or end of input buffer), check with full function */
2678
ret = xmlParseName (ctxt);
2679
/* strings coming from the dictionnary direct compare possible */
2681
return (const xmlChar*) 1;
2686
static const xmlChar *
2687
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2693
* Handler for more complex cases
2697
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698
(!IS_LETTER(c) && (c != '_') &&
2703
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2704
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2705
(c == '.') || (c == '-') ||
2706
(c == '_') || (c == ':') ||
2707
(IS_COMBINING(c)) ||
2708
(IS_EXTENDER(c)))) {
2709
if (count++ > 100) {
2717
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2719
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2723
* xmlParseStringName:
2724
* @ctxt: an XML parser context
2725
* @str: a pointer to the string pointer (IN/OUT)
2727
* parse an XML name.
2729
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730
* CombiningChar | Extender
2732
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
2734
* [6] Names ::= Name (#x20 Name)*
2736
* Returns the Name parsed or NULL. The @str pointer
2737
* is updated to the current location in the string.
2741
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742
xmlChar buf[XML_MAX_NAMELEN + 5];
2743
const xmlChar *cur = *str;
2747
c = CUR_SCHAR(cur, l);
2748
if (!IS_LETTER(c) && (c != '_') &&
2753
while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2754
(c == '.') || (c == '-') ||
2755
(c == '_') || (c == ':') ||
2756
(IS_COMBINING(c)) ||
2758
COPY_BUF(l,buf,len,c);
2760
c = CUR_SCHAR(cur, l);
2761
if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2763
* Okay someone managed to make a huge name, so he's ready to pay
2764
* for the processing speed.
2769
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2770
if (buffer == NULL) {
2771
xmlErrMemory(ctxt, NULL);
2774
memcpy(buffer, buf, len);
2775
while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2776
/* test bigentname.xml */
2777
(c == '.') || (c == '-') ||
2778
(c == '_') || (c == ':') ||
2779
(IS_COMBINING(c)) ||
2781
if (len + 10 > max) {
2784
tmp = (xmlChar *) xmlRealloc(buffer,
2785
max * sizeof(xmlChar));
2787
xmlErrMemory(ctxt, NULL);
2793
COPY_BUF(l,buffer,len,c);
2795
c = CUR_SCHAR(cur, l);
2803
return(xmlStrndup(buf, len));
2808
* @ctxt: an XML parser context
2810
* parse an XML Nmtoken.
2812
* [7] Nmtoken ::= (NameChar)+
2814
* [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2816
* Returns the Nmtoken parsed or NULL
2820
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821
xmlChar buf[XML_MAX_NAMELEN + 5];
2829
while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2830
(c == '.') || (c == '-') ||
2831
(c == '_') || (c == ':') ||
2832
(IS_COMBINING(c)) ||
2834
if (count++ > 100) {
2838
COPY_BUF(l,buf,len,c);
2841
if (len >= XML_MAX_NAMELEN) {
2843
* Okay someone managed to make a huge token, so he's ready to pay
2844
* for the processing speed.
2849
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2850
if (buffer == NULL) {
2851
xmlErrMemory(ctxt, NULL);
2854
memcpy(buffer, buf, len);
2855
while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2856
(c == '.') || (c == '-') ||
2857
(c == '_') || (c == ':') ||
2858
(IS_COMBINING(c)) ||
2860
if (count++ > 100) {
2864
if (len + 10 > max) {
2868
tmp = (xmlChar *) xmlRealloc(buffer,
2869
max * sizeof(xmlChar));
2871
xmlErrMemory(ctxt, NULL);
2877
COPY_BUF(l,buffer,len,c);
2887
return(xmlStrndup(buf, len));
2891
* xmlParseEntityValue:
2892
* @ctxt: an XML parser context
2893
* @orig: if non-NULL store a copy of the original entity value
2895
* parse a value for ENTITY declarations
2897
* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898
* "'" ([^%&'] | PEReference | Reference)* "'"
2900
* Returns the EntityValue parsed with reference substituted or NULL
2904
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905
xmlChar *buf = NULL;
2907
int size = XML_PARSER_BUFFER_SIZE;
2910
xmlChar *ret = NULL;
2911
const xmlChar *cur = NULL;
2912
xmlParserInputPtr input;
2914
if (RAW == '"') stop = '"';
2915
else if (RAW == '\'') stop = '\'';
2917
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
2920
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2922
xmlErrMemory(ctxt, NULL);
2927
* The content of the entity definition is copied in a buffer.
2930
ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931
input = ctxt->input;
2936
* NOTE: 4.4.5 Included in Literal
2937
* When a parameter entity reference appears in a literal entity
2938
* value, ... a single or double quote character in the replacement
2939
* text is always treated as a normal data character and will not
2940
* terminate the literal.
2941
* In practice it means we stop the loop only when back at parsing
2942
* the initial entity and the quote is found
2944
while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2945
(ctxt->input != input))) {
2946
if (len + 5 >= size) {
2950
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2952
xmlErrMemory(ctxt, NULL);
2958
COPY_BUF(l,buf,len,c);
2961
* Pop-up of finished entities.
2963
while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2976
* Raise problem w.r.t. '&' and '%' being used in non-entities
2977
* reference constructs. Note Charref will be handled in
2978
* xmlStringDecodeEntities()
2981
while (*cur != 0) { /* non input consuming */
2982
if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2987
name = xmlParseStringName(ctxt, &cur);
2988
if ((name == NULL) || (*cur != ';')) {
2989
xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
2990
"EntityValue: '%c' forbidden except for entities references\n",
2993
if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994
(ctxt->inputNr == 1)) {
2995
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3006
* Then PEReference entities are substituted.
3009
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3014
* NOTE: 4.4.7 Bypassed
3015
* When a general entity reference appears in the EntityValue in
3016
* an entity declaration, it is bypassed and left as is.
3017
* so XML_SUBSTITUTE_REF is not set here.
3019
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3031
* xmlParseAttValueComplex:
3032
* @ctxt: an XML parser context
3033
* @len: the resulting attribute len
3034
* @normalize: wether to apply the inner normalization
3036
* parse a value for an attribute, this is the fallback function
3037
* of xmlParseAttValue() when the attribute parsing requires handling
3038
* of non-ASCII characters, or normalization compaction.
3040
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3043
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3045
xmlChar *buf = NULL;
3048
int c, l, in_space = 0;
3049
xmlChar *current = NULL;
3052
if (NXT(0) == '"') {
3053
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3056
} else if (NXT(0) == '\'') {
3058
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3061
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3066
* allocate a translation buffer.
3068
buf_size = XML_PARSER_BUFFER_SIZE;
3069
buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3070
if (buf == NULL) goto mem_error;
3073
* OK loop until we reach one of the ending char or a size limit.
3076
while ((NXT(0) != limit) && /* checked */
3081
if (NXT(1) == '#') {
3082
int val = xmlParseCharRef(ctxt);
3085
if (ctxt->replaceEntities) {
3086
if (len > buf_size - 10) {
3092
* The reparsing will be done in xmlStringGetNodeList()
3093
* called by the attribute() function in SAX.c
3095
if (len > buf_size - 10) {
3105
if (len > buf_size - 10) {
3108
len += xmlCopyChar(0, &buf[len], val);
3111
ent = xmlParseEntityRef(ctxt);
3112
if ((ent != NULL) &&
3113
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114
if (len > buf_size - 10) {
3117
if ((ctxt->replaceEntities == 0) &&
3118
(ent->content[0] == '&')) {
3125
buf[len++] = ent->content[0];
3127
} else if ((ent != NULL) &&
3128
(ctxt->replaceEntities != 0)) {
3131
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132
rep = xmlStringDecodeEntities(ctxt, ent->content,
3137
while (*current != 0) { /* non input consuming */
3138
buf[len++] = *current++;
3139
if (len > buf_size - 10) {
3146
if (len > buf_size - 10) {
3149
if (ent->content != NULL)
3150
buf[len++] = ent->content[0];
3152
} else if (ent != NULL) {
3153
int i = xmlStrlen(ent->name);
3154
const xmlChar *cur = ent->name;
3157
* This may look absurd but is needed to detect
3160
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161
(ent->content != NULL)) {
3163
rep = xmlStringDecodeEntities(ctxt, ent->content,
3164
XML_SUBSTITUTE_REF, 0, 0, 0);
3170
* Just output the reference
3173
if (len > buf_size - i - 10) {
3177
buf[len++] = *cur++;
3182
if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3183
if ((len != 0) || (!normalize)) {
3184
if ((!normalize) || (!in_space)) {
3185
COPY_BUF(l,buf,len,0x20);
3186
if (len > buf_size - 10) {
3194
COPY_BUF(l,buf,len,c);
3195
if (len > buf_size - 10) {
3204
if ((in_space) && (normalize)) {
3205
while (buf[len - 1] == 0x20) len--;
3209
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3210
} else if (RAW != limit) {
3211
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3212
"AttValue: ' expected\n");
3215
if (attlen != NULL) *attlen = len;
3219
xmlErrMemory(ctxt, NULL);
3225
* @ctxt: an XML parser context
3227
* parse a value for an attribute
3228
* Note: the parser won't do substitution of entities here, this
3229
* will be handled later in xmlStringGetNodeList
3231
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3232
* "'" ([^<&'] | Reference)* "'"
3234
* 3.3.3 Attribute-Value Normalization:
3235
* Before the value of an attribute is passed to the application or
3236
* checked for validity, the XML processor must normalize it as follows:
3237
* - a character reference is processed by appending the referenced
3238
* character to the attribute value
3239
* - an entity reference is processed by recursively processing the
3240
* replacement text of the entity
3241
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3242
* appending #x20 to the normalized value, except that only a single
3243
* #x20 is appended for a "#xD#xA" sequence that is part of an external
3244
* parsed entity or the literal entity value of an internal parsed entity
3245
* - other characters are processed by appending them to the normalized value
3246
* If the declared value is not CDATA, then the XML processor must further
3247
* process the normalized attribute value by discarding any leading and
3248
* trailing space (#x20) characters, and by replacing sequences of space
3249
* (#x20) characters by a single space (#x20) character.
3250
* All attributes for which no declaration has been read should be treated
3251
* by a non-validating parser as if declared CDATA.
3253
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3258
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3259
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3260
return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3264
* xmlParseSystemLiteral:
3265
* @ctxt: an XML parser context
3267
* parse an XML Literal
3269
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3271
* Returns the SystemLiteral parsed or NULL
3275
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3276
xmlChar *buf = NULL;
3278
int size = XML_PARSER_BUFFER_SIZE;
3281
int state = ctxt->instate;
3288
} else if (RAW == '\'') {
3292
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3296
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3298
xmlErrMemory(ctxt, NULL);
3301
ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3303
while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3304
if (len + 5 >= size) {
3308
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3311
xmlErrMemory(ctxt, NULL);
3312
ctxt->instate = (xmlParserInputState) state;
3322
COPY_BUF(l,buf,len,cur);
3332
ctxt->instate = (xmlParserInputState) state;
3333
if (!IS_CHAR(cur)) {
3334
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3342
* xmlParsePubidLiteral:
3343
* @ctxt: an XML parser context
3345
* parse an XML public literal
3347
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3349
* Returns the PubidLiteral parsed or NULL.
3353
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3354
xmlChar *buf = NULL;
3356
int size = XML_PARSER_BUFFER_SIZE;
3360
xmlParserInputState oldstate = ctxt->instate;
3366
} else if (RAW == '\'') {
3370
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3373
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3375
xmlErrMemory(ctxt, NULL);
3378
ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3380
while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3381
if (len + 1 >= size) {
3385
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3387
xmlErrMemory(ctxt, NULL);
3409
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3413
ctxt->instate = oldstate;
3417
void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3420
* used for the test in the inner loop of the char data testing
3422
static const unsigned char test_char_data[256] = {
3423
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424
0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3425
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3426
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3427
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3428
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3429
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3430
0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3431
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3432
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3433
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3434
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3435
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3436
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3437
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3438
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3439
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3440
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3459
* @ctxt: an XML parser context
3460
* @cdata: int indicating whether we are within a CDATA section
3462
* parse a CharData section.
3463
* if we are within a CDATA section ']]>' marks an end of section.
3465
* The right angle bracket (>) may be represented using the string ">",
3466
* and must, for compatibility, be escaped using ">" or a character
3467
* reference when it appears in the string "]]>" in content, when that
3468
* string is not marking the end of a CDATA section.
3470
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3474
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3477
int line = ctxt->input->line;
3478
int col = ctxt->input->col;
3484
* Accelerated common case where input don't need to be
3485
* modified before passing it to the handler.
3488
in = ctxt->input->cur;
3491
while (*in == 0x20) in++;
3494
ctxt->input->line++; ctxt->input->col = 1;
3496
} while (*in == 0xA);
3497
goto get_more_space;
3500
nbchar = in - ctxt->input->cur;
3502
const xmlChar *tmp = ctxt->input->cur;
3503
ctxt->input->cur = in;
3505
if ((ctxt->sax != NULL) &&
3506
(ctxt->sax->ignorableWhitespace !=
3507
ctxt->sax->characters)) {
3508
if (areBlanks(ctxt, tmp, nbchar, 1)) {
3509
if (ctxt->sax->ignorableWhitespace != NULL)
3510
ctxt->sax->ignorableWhitespace(ctxt->userData,
3513
if (ctxt->sax->characters != NULL)
3514
ctxt->sax->characters(ctxt->userData,
3516
if (*ctxt->space == -1)
3519
} else if ((ctxt->sax != NULL) &&
3520
(ctxt->sax->characters != NULL)) {
3521
ctxt->sax->characters(ctxt->userData,
3529
ccol = ctxt->input->col;
3530
while (test_char_data[*in]) {
3534
ctxt->input->col = ccol;
3537
ctxt->input->line++; ctxt->input->col = 1;
3539
} while (*in == 0xA);
3543
if ((in[1] == ']') && (in[2] == '>')) {
3544
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3545
ctxt->input->cur = in;
3552
nbchar = in - ctxt->input->cur;
3554
if ((ctxt->sax != NULL) &&
3555
(ctxt->sax->ignorableWhitespace !=
3556
ctxt->sax->characters) &&
3557
(IS_BLANK_CH(*ctxt->input->cur))) {
3558
const xmlChar *tmp = ctxt->input->cur;
3559
ctxt->input->cur = in;
3561
if (areBlanks(ctxt, tmp, nbchar, 0)) {
3562
if (ctxt->sax->ignorableWhitespace != NULL)
3563
ctxt->sax->ignorableWhitespace(ctxt->userData,
3566
if (ctxt->sax->characters != NULL)
3567
ctxt->sax->characters(ctxt->userData,
3569
if (*ctxt->space == -1)
3572
line = ctxt->input->line;
3573
col = ctxt->input->col;
3574
} else if (ctxt->sax != NULL) {
3575
if (ctxt->sax->characters != NULL)
3576
ctxt->sax->characters(ctxt->userData,
3577
ctxt->input->cur, nbchar);
3578
line = ctxt->input->line;
3579
col = ctxt->input->col;
3582
ctxt->input->cur = in;
3586
ctxt->input->cur = in;
3588
ctxt->input->line++; ctxt->input->col = 1;
3589
continue; /* while */
3601
in = ctxt->input->cur;
3602
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3605
ctxt->input->line = line;
3606
ctxt->input->col = col;
3607
xmlParseCharDataComplex(ctxt, cdata);
3611
* xmlParseCharDataComplex:
3612
* @ctxt: an XML parser context
3613
* @cdata: int indicating whether we are within a CDATA section
3615
* parse a CharData section.this is the fallback function
3616
* of xmlParseCharData() when the parsing requires handling
3617
* of non-ASCII characters.
3620
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3621
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3629
while ((cur != '<') && /* checked */
3631
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3632
if ((cur == ']') && (NXT(1) == ']') &&
3636
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3639
COPY_BUF(l,buf,nbchar,cur);
3640
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3644
* OK the segment is to be consumed as chars.
3646
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3647
if (areBlanks(ctxt, buf, nbchar, 0)) {
3648
if (ctxt->sax->ignorableWhitespace != NULL)
3649
ctxt->sax->ignorableWhitespace(ctxt->userData,
3652
if (ctxt->sax->characters != NULL)
3653
ctxt->sax->characters(ctxt->userData, buf, nbchar);
3654
if ((ctxt->sax->characters !=
3655
ctxt->sax->ignorableWhitespace) &&
3656
(*ctxt->space == -1))
3673
* OK the segment is to be consumed as chars.
3675
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3676
if (areBlanks(ctxt, buf, nbchar, 0)) {
3677
if (ctxt->sax->ignorableWhitespace != NULL)
3678
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3680
if (ctxt->sax->characters != NULL)
3681
ctxt->sax->characters(ctxt->userData, buf, nbchar);
3682
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3683
(*ctxt->space == -1))
3688
if ((cur != 0) && (!IS_CHAR(cur))) {
3689
/* Generate the error and skip the offending character */
3690
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3691
"PCDATA invalid Char value %d\n",
3698
* xmlParseExternalID:
3699
* @ctxt: an XML parser context
3700
* @publicID: a xmlChar** receiving PubidLiteral
3701
* @strict: indicate whether we should restrict parsing to only
3702
* production [75], see NOTE below
3704
* Parse an External ID or a Public ID
3706
* NOTE: Productions [75] and [83] interact badly since [75] can generate
3707
* 'PUBLIC' S PubidLiteral S SystemLiteral
3709
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3710
* | 'PUBLIC' S PubidLiteral S SystemLiteral
3712
* [83] PublicID ::= 'PUBLIC' S PubidLiteral
3714
* Returns the function returns SystemLiteral and in the second
3715
* case publicID receives PubidLiteral, is strict is off
3716
* it is possible to return NULL and have publicID set.
3720
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3721
xmlChar *URI = NULL;
3726
if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3728
if (!IS_BLANK_CH(CUR)) {
3729
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3730
"Space required after 'SYSTEM'\n");
3733
URI = xmlParseSystemLiteral(ctxt);
3735
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3737
} else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3739
if (!IS_BLANK_CH(CUR)) {
3740
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3741
"Space required after 'PUBLIC'\n");
3744
*publicID = xmlParsePubidLiteral(ctxt);
3745
if (*publicID == NULL) {
3746
xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3750
* We don't handle [83] so "S SystemLiteral" is required.
3752
if (!IS_BLANK_CH(CUR)) {
3753
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3754
"Space required after the Public Identifier\n");
3758
* We handle [83] so we return immediately, if
3759
* "S SystemLiteral" is not detected. From a purely parsing
3760
* point of view that's a nice mess.
3766
if (!IS_BLANK_CH(*ptr)) return(NULL);
3768
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3769
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3772
URI = xmlParseSystemLiteral(ctxt);
3774
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3781
* xmlParseCommentComplex:
3782
* @ctxt: an XML parser context
3783
* @buf: the already parsed part of the buffer
3784
* @len: number of bytes filles in the buffer
3785
* @size: allocated size of the buffer
3787
* Skip an XML (SGML) comment <!-- .... -->
3788
* The spec says that "For compatibility, the string "--" (double-hyphen)
3789
* must not occur within comments. "
3790
* This is the slow routine in case the accelerator for ascii didn't work
3792
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3795
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3799
xmlParserInputPtr input = ctxt->input;
3804
size = XML_PARSER_BUFFER_SIZE;
3805
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3807
xmlErrMemory(ctxt, NULL);
3813
goto not_terminated;
3817
goto not_terminated;
3821
goto not_terminated;
3822
while (IS_CHAR(cur) && /* checked */
3824
(r != '-') || (q != '-'))) {
3825
if ((r == '-') && (q == '-')) {
3826
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
3828
if (len + 5 >= size) {
3831
new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3832
if (new_buf == NULL) {
3834
xmlErrMemory(ctxt, NULL);
3839
COPY_BUF(ql,buf,len,q);
3859
if (!IS_CHAR(cur)) {
3860
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3861
"Comment not terminated \n<!--%.50s\n", buf);
3864
if (input != ctxt->input) {
3865
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3866
"Comment doesn't start and stop in the same entity\n");
3869
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3870
(!ctxt->disableSAX))
3871
ctxt->sax->comment(ctxt->userData, buf);
3876
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3877
"Comment not terminated\n", NULL);
3882
* @ctxt: an XML parser context
3884
* Skip an XML (SGML) comment <!-- .... -->
3885
* The spec says that "For compatibility, the string "--" (double-hyphen)
3886
* must not occur within comments. "
3888
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3891
xmlParseComment(xmlParserCtxtPtr ctxt) {
3892
xmlChar *buf = NULL;
3893
int size = XML_PARSER_BUFFER_SIZE;
3895
xmlParserInputState state;
3897
int nbchar = 0, ccol;
3900
* Check that there is a comment right here.
3902
if ((RAW != '<') || (NXT(1) != '!') ||
3903
(NXT(2) != '-') || (NXT(3) != '-')) return;
3905
state = ctxt->instate;
3906
ctxt->instate = XML_PARSER_COMMENT;
3912
* Accelerated common case where input don't need to be
3913
* modified before passing it to the handler.
3915
in = ctxt->input->cur;
3919
ctxt->input->line++; ctxt->input->col = 1;
3921
} while (*in == 0xA);
3924
ccol = ctxt->input->col;
3925
while (((*in > '-') && (*in <= 0x7F)) ||
3926
((*in >= 0x20) && (*in < '-')) ||
3931
ctxt->input->col = ccol;
3934
ctxt->input->line++; ctxt->input->col = 1;
3936
} while (*in == 0xA);
3939
nbchar = in - ctxt->input->cur;
3941
* save current set of data
3944
if ((ctxt->sax != NULL) &&
3945
(ctxt->sax->comment != NULL)) {
3947
if ((*in == '-') && (in[1] == '-'))
3950
size = XML_PARSER_BUFFER_SIZE + nbchar;
3951
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3953
xmlErrMemory(ctxt, NULL);
3954
ctxt->instate = state;
3958
} else if (len + nbchar + 1 >= size) {
3960
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3961
new_buf = (xmlChar *) xmlRealloc(buf,
3962
size * sizeof(xmlChar));
3963
if (new_buf == NULL) {
3965
xmlErrMemory(ctxt, NULL);
3966
ctxt->instate = state;
3971
memcpy(&buf[len], ctxt->input->cur, nbchar);
3976
ctxt->input->cur = in;
3979
ctxt->input->line++; ctxt->input->col = 1;
3984
ctxt->input->cur = in;
3986
ctxt->input->line++; ctxt->input->col = 1;
3987
continue; /* while */
3993
in = ctxt->input->cur;
3998
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3999
(!ctxt->disableSAX)) {
4001
ctxt->sax->comment(ctxt->userData, buf);
4003
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4007
ctxt->instate = state;
4011
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4012
"Comment not terminated \n<!--%.50s\n",
4015
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4016
"Comment not terminated \n", NULL);
4024
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4025
xmlParseCommentComplex(ctxt, buf, len, size);
4026
ctxt->instate = state;
4033
* @ctxt: an XML parser context
4035
* parse the name of a PI
4037
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4039
* Returns the PITarget name or NULL
4043
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4044
const xmlChar *name;
4046
name = xmlParseName(ctxt);
4047
if ((name != NULL) &&
4048
((name[0] == 'x') || (name[0] == 'X')) &&
4049
((name[1] == 'm') || (name[1] == 'M')) &&
4050
((name[2] == 'l') || (name[2] == 'L'))) {
4052
if ((name[0] == 'x') && (name[1] == 'm') &&
4053
(name[2] == 'l') && (name[3] == 0)) {
4054
xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4055
"XML declaration allowed only at the start of the document\n");
4057
} else if (name[3] == 0) {
4058
xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4062
if (xmlW3CPIs[i] == NULL) break;
4063
if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4066
xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4067
"xmlParsePITarget: invalid name prefix 'xml'\n",
4073
#ifdef LIBXML_CATALOG_ENABLED
4075
* xmlParseCatalogPI:
4076
* @ctxt: an XML parser context
4077
* @catalog: the PI value string
4079
* parse an XML Catalog Processing Instruction.
4081
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4083
* Occurs only if allowed by the user and if happening in the Misc
4084
* part of the document before any doctype informations
4085
* This will add the given catalog to the parsing context in order
4086
* to be used if there is a resolution need further down in the document
4090
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4091
xmlChar *URL = NULL;
4092
const xmlChar *tmp, *base;
4096
while (IS_BLANK_CH(*tmp)) tmp++;
4097
if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4100
while (IS_BLANK_CH(*tmp)) tmp++;
4105
while (IS_BLANK_CH(*tmp)) tmp++;
4107
if ((marker != '\'') && (marker != '"'))
4111
while ((*tmp != 0) && (*tmp != marker)) tmp++;
4114
URL = xmlStrndup(base, tmp - base);
4116
while (IS_BLANK_CH(*tmp)) tmp++;
4121
ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4127
xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4128
"Catalog PI syntax error: %s\n",
4137
* @ctxt: an XML parser context
4139
* parse an XML Processing Instruction.
4141
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4143
* The processing is transfered to SAX once parsed.
4147
xmlParsePI(xmlParserCtxtPtr ctxt) {
4148
xmlChar *buf = NULL;
4150
int size = XML_PARSER_BUFFER_SIZE;
4152
const xmlChar *target;
4153
xmlParserInputState state;
4156
if ((RAW == '<') && (NXT(1) == '?')) {
4157
xmlParserInputPtr input = ctxt->input;
4158
state = ctxt->instate;
4159
ctxt->instate = XML_PARSER_PI;
4161
* this is a Processing Instruction.
4167
* Parse the target name and check for special support like
4170
target = xmlParsePITarget(ctxt);
4171
if (target != NULL) {
4172
if ((RAW == '?') && (NXT(1) == '>')) {
4173
if (input != ctxt->input) {
4174
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4175
"PI declaration doesn't start and stop in the same entity\n");
4182
if ((ctxt->sax) && (!ctxt->disableSAX) &&
4183
(ctxt->sax->processingInstruction != NULL))
4184
ctxt->sax->processingInstruction(ctxt->userData,
4186
ctxt->instate = state;
4189
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4191
xmlErrMemory(ctxt, NULL);
4192
ctxt->instate = state;
4196
if (!IS_BLANK(cur)) {
4197
xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4198
"ParsePI: PI %s space expected\n", target);
4202
while (IS_CHAR(cur) && /* checked */
4203
((cur != '?') || (NXT(1) != '>'))) {
4204
if (len + 5 >= size) {
4208
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4210
xmlErrMemory(ctxt, NULL);
4212
ctxt->instate = state;
4222
COPY_BUF(l,buf,len,cur);
4233
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4234
"ParsePI: PI %s never end ...\n", target);
4236
if (input != ctxt->input) {
4237
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4238
"PI declaration doesn't start and stop in the same entity\n");
4242
#ifdef LIBXML_CATALOG_ENABLED
4243
if (((state == XML_PARSER_MISC) ||
4244
(state == XML_PARSER_START)) &&
4245
(xmlStrEqual(target, XML_CATALOG_PI))) {
4246
xmlCatalogAllow allow = xmlCatalogGetDefaults();
4247
if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4248
(allow == XML_CATA_ALLOW_ALL))
4249
xmlParseCatalogPI(ctxt, buf);
4257
if ((ctxt->sax) && (!ctxt->disableSAX) &&
4258
(ctxt->sax->processingInstruction != NULL))
4259
ctxt->sax->processingInstruction(ctxt->userData,
4264
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4266
ctxt->instate = state;
4271
* xmlParseNotationDecl:
4272
* @ctxt: an XML parser context
4274
* parse a notation declaration
4276
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4278
* Hence there is actually 3 choices:
4279
* 'PUBLIC' S PubidLiteral
4280
* 'PUBLIC' S PubidLiteral S SystemLiteral
4281
* and 'SYSTEM' S SystemLiteral
4283
* See the NOTE on xmlParseExternalID().
4287
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4288
const xmlChar *name;
4292
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4293
xmlParserInputPtr input = ctxt->input;
4296
if (!IS_BLANK_CH(CUR)) {
4297
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4298
"Space required after '<!NOTATION'\n");
4303
name = xmlParseName(ctxt);
4305
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4308
if (!IS_BLANK_CH(CUR)) {
4309
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4310
"Space required after the NOTATION name'\n");
4318
Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4322
if (input != ctxt->input) {
4323
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4324
"Notation declaration doesn't start and stop in the same entity\n");
4327
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4328
(ctxt->sax->notationDecl != NULL))
4329
ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4331
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4333
if (Systemid != NULL) xmlFree(Systemid);
4334
if (Pubid != NULL) xmlFree(Pubid);
4339
* xmlParseEntityDecl:
4340
* @ctxt: an XML parser context
4342
* parse <!ENTITY declarations
4344
* [70] EntityDecl ::= GEDecl | PEDecl
4346
* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4348
* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4350
* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4352
* [74] PEDef ::= EntityValue | ExternalID
4354
* [76] NDataDecl ::= S 'NDATA' S Name
4356
* [ VC: Notation Declared ]
4357
* The Name must match the declared name of a notation.
4361
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4362
const xmlChar *name = NULL;
4363
xmlChar *value = NULL;
4364
xmlChar *URI = NULL, *literal = NULL;
4365
const xmlChar *ndata = NULL;
4366
int isParameter = 0;
4367
xmlChar *orig = NULL;
4370
/* GROW; done in the caller */
4371
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4372
xmlParserInputPtr input = ctxt->input;
4375
skipped = SKIP_BLANKS;
4377
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4378
"Space required after '<!ENTITY'\n");
4383
skipped = SKIP_BLANKS;
4385
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4386
"Space required after '%'\n");
4391
name = xmlParseName(ctxt);
4393
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4394
"xmlParseEntityDecl: no name\n");
4397
skipped = SKIP_BLANKS;
4399
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4400
"Space required after the entity name\n");
4403
ctxt->instate = XML_PARSER_ENTITY_DECL;
4405
* handle the various case of definitions...
4408
if ((RAW == '"') || (RAW == '\'')) {
4409
value = xmlParseEntityValue(ctxt, &orig);
4411
if ((ctxt->sax != NULL) &&
4412
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4413
ctxt->sax->entityDecl(ctxt->userData, name,
4414
XML_INTERNAL_PARAMETER_ENTITY,
4418
URI = xmlParseExternalID(ctxt, &literal, 1);
4419
if ((URI == NULL) && (literal == NULL)) {
4420
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4425
uri = xmlParseURI((const char *) URI);
4427
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4428
"Invalid URI: %s\n", URI);
4430
* This really ought to be a well formedness error
4431
* but the XML Core WG decided otherwise c.f. issue
4432
* E26 of the XML erratas.
4435
if (uri->fragment != NULL) {
4437
* Okay this is foolish to block those but not
4440
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4442
if ((ctxt->sax != NULL) &&
4443
(!ctxt->disableSAX) &&
4444
(ctxt->sax->entityDecl != NULL))
4445
ctxt->sax->entityDecl(ctxt->userData, name,
4446
XML_EXTERNAL_PARAMETER_ENTITY,
4447
literal, URI, NULL);
4454
if ((RAW == '"') || (RAW == '\'')) {
4455
value = xmlParseEntityValue(ctxt, &orig);
4456
if ((ctxt->sax != NULL) &&
4457
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4458
ctxt->sax->entityDecl(ctxt->userData, name,
4459
XML_INTERNAL_GENERAL_ENTITY,
4462
* For expat compatibility in SAX mode.
4464
if ((ctxt->myDoc == NULL) ||
4465
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4466
if (ctxt->myDoc == NULL) {
4467
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4469
if (ctxt->myDoc->intSubset == NULL)
4470
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4471
BAD_CAST "fake", NULL, NULL);
4473
xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4477
URI = xmlParseExternalID(ctxt, &literal, 1);
4478
if ((URI == NULL) && (literal == NULL)) {
4479
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4484
uri = xmlParseURI((const char *)URI);
4486
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4487
"Invalid URI: %s\n", URI);
4489
* This really ought to be a well formedness error
4490
* but the XML Core WG decided otherwise c.f. issue
4491
* E26 of the XML erratas.
4494
if (uri->fragment != NULL) {
4496
* Okay this is foolish to block those but not
4499
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4504
if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4505
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4506
"Space required before 'NDATA'\n");
4509
if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4511
if (!IS_BLANK_CH(CUR)) {
4512
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4513
"Space required after 'NDATA'\n");
4516
ndata = xmlParseName(ctxt);
4517
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4518
(ctxt->sax->unparsedEntityDecl != NULL))
4519
ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4520
literal, URI, ndata);
4522
if ((ctxt->sax != NULL) &&
4523
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4524
ctxt->sax->entityDecl(ctxt->userData, name,
4525
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4526
literal, URI, NULL);
4528
* For expat compatibility in SAX mode.
4529
* assuming the entity repalcement was asked for
4531
if ((ctxt->replaceEntities != 0) &&
4532
((ctxt->myDoc == NULL) ||
4533
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4534
if (ctxt->myDoc == NULL) {
4535
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4538
if (ctxt->myDoc->intSubset == NULL)
4539
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4540
BAD_CAST "fake", NULL, NULL);
4541
xmlSAX2EntityDecl(ctxt, name,
4542
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4543
literal, URI, NULL);
4550
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4551
"xmlParseEntityDecl: entity %s not terminated\n", name);
4553
if (input != ctxt->input) {
4554
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4555
"Entity declaration doesn't start and stop in the same entity\n");
4561
* Ugly mechanism to save the raw entity value.
4563
xmlEntityPtr cur = NULL;
4566
if ((ctxt->sax != NULL) &&
4567
(ctxt->sax->getParameterEntity != NULL))
4568
cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4570
if ((ctxt->sax != NULL) &&
4571
(ctxt->sax->getEntity != NULL))
4572
cur = ctxt->sax->getEntity(ctxt->userData, name);
4573
if ((cur == NULL) && (ctxt->userData==ctxt)) {
4574
cur = xmlSAX2GetEntity(ctxt, name);
4578
if (cur->orig != NULL)
4585
if (value != NULL) xmlFree(value);
4586
if (URI != NULL) xmlFree(URI);
4587
if (literal != NULL) xmlFree(literal);
4592
* xmlParseDefaultDecl:
4593
* @ctxt: an XML parser context
4594
* @value: Receive a possible fixed default value for the attribute
4596
* Parse an attribute default declaration
4598
* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4600
* [ VC: Required Attribute ]
4601
* if the default declaration is the keyword #REQUIRED, then the
4602
* attribute must be specified for all elements of the type in the
4603
* attribute-list declaration.
4605
* [ VC: Attribute Default Legal ]
4606
* The declared default value must meet the lexical constraints of
4607
* the declared attribute type c.f. xmlValidateAttributeDecl()
4609
* [ VC: Fixed Attribute Default ]
4610
* if an attribute has a default value declared with the #FIXED
4611
* keyword, instances of that attribute must match the default value.
4613
* [ WFC: No < in Attribute Values ]
4614
* handled in xmlParseAttValue()
4616
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4617
* or XML_ATTRIBUTE_FIXED.
4621
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4626
if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4628
return(XML_ATTRIBUTE_REQUIRED);
4630
if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4632
return(XML_ATTRIBUTE_IMPLIED);
4634
val = XML_ATTRIBUTE_NONE;
4635
if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4637
val = XML_ATTRIBUTE_FIXED;
4638
if (!IS_BLANK_CH(CUR)) {
4639
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4640
"Space required after '#FIXED'\n");
4644
ret = xmlParseAttValue(ctxt);
4645
ctxt->instate = XML_PARSER_DTD;
4647
xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4648
"Attribute default value declaration error\n");
4655
* xmlParseNotationType:
4656
* @ctxt: an XML parser context
4658
* parse an Notation attribute type.
4660
* Note: the leading 'NOTATION' S part has already being parsed...
4662
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4664
* [ VC: Notation Attributes ]
4665
* Values of this type must match one of the notation names included
4666
* in the declaration; all notation names in the declaration must be declared.
4668
* Returns: the notation attribute tree built while parsing
4672
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4673
const xmlChar *name;
4674
xmlEnumerationPtr ret = NULL, last = NULL, cur;
4677
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4684
name = xmlParseName(ctxt);
4686
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4687
"Name expected in NOTATION declaration\n");
4690
cur = xmlCreateEnumeration(name);
4691
if (cur == NULL) return(ret);
4692
if (last == NULL) ret = last = cur;
4698
} while (RAW == '|');
4700
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4701
if ((last != NULL) && (last != ret))
4702
xmlFreeEnumeration(last);
4710
* xmlParseEnumerationType:
4711
* @ctxt: an XML parser context
4713
* parse an Enumeration attribute type.
4715
* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4717
* [ VC: Enumeration ]
4718
* Values of this type must match one of the Nmtoken tokens in
4721
* Returns: the enumeration attribute tree built while parsing
4725
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4727
xmlEnumerationPtr ret = NULL, last = NULL, cur;
4730
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4737
name = xmlParseNmtoken(ctxt);
4739
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4742
cur = xmlCreateEnumeration(name);
4744
if (cur == NULL) return(ret);
4745
if (last == NULL) ret = last = cur;
4751
} while (RAW == '|');
4753
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4761
* xmlParseEnumeratedType:
4762
* @ctxt: an XML parser context
4763
* @tree: the enumeration tree built while parsing
4765
* parse an Enumerated attribute type.
4767
* [57] EnumeratedType ::= NotationType | Enumeration
4769
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4772
* Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4776
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4777
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4779
if (!IS_BLANK_CH(CUR)) {
4780
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4781
"Space required after 'NOTATION'\n");
4785
*tree = xmlParseNotationType(ctxt);
4786
if (*tree == NULL) return(0);
4787
return(XML_ATTRIBUTE_NOTATION);
4789
*tree = xmlParseEnumerationType(ctxt);
4790
if (*tree == NULL) return(0);
4791
return(XML_ATTRIBUTE_ENUMERATION);
4795
* xmlParseAttributeType:
4796
* @ctxt: an XML parser context
4797
* @tree: the enumeration tree built while parsing
4799
* parse the Attribute list def for an element
4801
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
4803
* [55] StringType ::= 'CDATA'
4805
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4806
* 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4808
* Validity constraints for attribute values syntax are checked in
4809
* xmlValidateAttributeValue()
4812
* Values of type ID must match the Name production. A name must not
4813
* appear more than once in an XML document as a value of this type;
4814
* i.e., ID values must uniquely identify the elements which bear them.
4816
* [ VC: One ID per Element Type ]
4817
* No element type may have more than one ID attribute specified.
4819
* [ VC: ID Attribute Default ]
4820
* An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4823
* Values of type IDREF must match the Name production, and values
4824
* of type IDREFS must match Names; each IDREF Name must match the value
4825
* of an ID attribute on some element in the XML document; i.e. IDREF
4826
* values must match the value of some ID attribute.
4828
* [ VC: Entity Name ]
4829
* Values of type ENTITY must match the Name production, values
4830
* of type ENTITIES must match Names; each Entity Name must match the
4831
* name of an unparsed entity declared in the DTD.
4833
* [ VC: Name Token ]
4834
* Values of type NMTOKEN must match the Nmtoken production; values
4835
* of type NMTOKENS must match Nmtokens.
4837
* Returns the attribute type
4840
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4842
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
4844
return(XML_ATTRIBUTE_CDATA);
4845
} else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
4847
return(XML_ATTRIBUTE_IDREFS);
4848
} else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
4850
return(XML_ATTRIBUTE_IDREF);
4851
} else if ((RAW == 'I') && (NXT(1) == 'D')) {
4853
return(XML_ATTRIBUTE_ID);
4854
} else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
4856
return(XML_ATTRIBUTE_ENTITY);
4857
} else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
4859
return(XML_ATTRIBUTE_ENTITIES);
4860
} else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
4862
return(XML_ATTRIBUTE_NMTOKENS);
4863
} else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
4865
return(XML_ATTRIBUTE_NMTOKEN);
4867
return(xmlParseEnumeratedType(ctxt, tree));
4871
* xmlParseAttributeListDecl:
4872
* @ctxt: an XML parser context
4874
* : parse the Attribute list def for an element
4876
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4878
* [53] AttDef ::= S Name S AttType S DefaultDecl
4882
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4883
const xmlChar *elemName;
4884
const xmlChar *attrName;
4885
xmlEnumerationPtr tree;
4887
if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
4888
xmlParserInputPtr input = ctxt->input;
4891
if (!IS_BLANK_CH(CUR)) {
4892
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4893
"Space required after '<!ATTLIST'\n");
4896
elemName = xmlParseName(ctxt);
4897
if (elemName == NULL) {
4898
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4899
"ATTLIST: no name for Element\n");
4904
while (RAW != '>') {
4905
const xmlChar *check = CUR_PTR;
4908
xmlChar *defaultValue = NULL;
4912
attrName = xmlParseName(ctxt);
4913
if (attrName == NULL) {
4914
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4915
"ATTLIST: no name for Attribute\n");
4919
if (!IS_BLANK_CH(CUR)) {
4920
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4921
"Space required after the attribute name\n");
4926
type = xmlParseAttributeType(ctxt, &tree);
4932
if (!IS_BLANK_CH(CUR)) {
4933
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4934
"Space required after the attribute type\n");
4936
xmlFreeEnumeration(tree);
4941
def = xmlParseDefaultDecl(ctxt, &defaultValue);
4943
if (defaultValue != NULL)
4944
xmlFree(defaultValue);
4946
xmlFreeEnumeration(tree);
4952
if (!IS_BLANK_CH(CUR)) {
4953
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4954
"Space required after the attribute default value\n");
4955
if (defaultValue != NULL)
4956
xmlFree(defaultValue);
4958
xmlFreeEnumeration(tree);
4963
if (check == CUR_PTR) {
4964
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4965
"in xmlParseAttributeListDecl\n");
4966
if (defaultValue != NULL)
4967
xmlFree(defaultValue);
4969
xmlFreeEnumeration(tree);
4972
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4973
(ctxt->sax->attributeDecl != NULL))
4974
ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4975
type, def, defaultValue, tree);
4976
else if (tree != NULL)
4977
xmlFreeEnumeration(tree);
4979
if ((ctxt->sax2) && (defaultValue != NULL) &&
4980
(def != XML_ATTRIBUTE_IMPLIED) &&
4981
(def != XML_ATTRIBUTE_REQUIRED)) {
4982
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4984
if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4985
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4987
if (defaultValue != NULL)
4988
xmlFree(defaultValue);
4992
if (input != ctxt->input) {
4993
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4994
"Attribute list declaration doesn't start and stop in the same entity\n");
5002
* xmlParseElementMixedContentDecl:
5003
* @ctxt: an XML parser context
5004
* @inputchk: the input used for the current entity, needed for boundary checks
5006
* parse the declaration for a Mixed Element content
5007
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5009
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5010
* '(' S? '#PCDATA' S? ')'
5012
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5014
* [ VC: No Duplicate Types ]
5015
* The same name must not appear more than once in a single
5016
* mixed-content declaration.
5018
* returns: the list of the xmlElementContentPtr describing the element choices
5020
xmlElementContentPtr
5021
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5022
xmlElementContentPtr ret = NULL, cur = NULL, n;
5023
const xmlChar *elem = NULL;
5026
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5031
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5032
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5033
"Element content declaration doesn't start and stop in the same entity\n",
5037
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5039
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5044
if ((RAW == '(') || (RAW == '|')) {
5045
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5046
if (ret == NULL) return(NULL);
5048
while (RAW == '|') {
5051
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5052
if (ret == NULL) return(NULL);
5058
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5059
if (n == NULL) return(NULL);
5060
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5069
elem = xmlParseName(ctxt);
5071
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5072
"xmlParseElementMixedContentDecl : Name expected\n");
5073
xmlFreeDocElementContent(ctxt->myDoc, cur);
5079
if ((RAW == ')') && (NXT(1) == '*')) {
5081
cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5082
XML_ELEMENT_CONTENT_ELEMENT);
5083
if (cur->c2 != NULL)
5084
cur->c2->parent = cur;
5086
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5087
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5088
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5089
"Element content declaration doesn't start and stop in the same entity\n",
5094
xmlFreeDocElementContent(ctxt->myDoc, ret);
5095
xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5100
xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5106
* xmlParseElementChildrenContentDecl:
5107
* @ctxt: an XML parser context
5108
* @inputchk: the input used for the current entity, needed for boundary checks
5110
* parse the declaration for a Mixed Element content
5111
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5114
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
5116
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5118
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5120
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5122
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5123
* TODO Parameter-entity replacement text must be properly nested
5124
* with parenthesized groups. That is to say, if either of the
5125
* opening or closing parentheses in a choice, seq, or Mixed
5126
* construct is contained in the replacement text for a parameter
5127
* entity, both must be contained in the same replacement text. For
5128
* interoperability, if a parameter-entity reference appears in a
5129
* choice, seq, or Mixed construct, its replacement text should not
5130
* be empty, and neither the first nor last non-blank character of
5131
* the replacement text should be a connector (| or ,).
5133
* Returns the tree of xmlElementContentPtr describing the element
5136
xmlElementContentPtr
5137
xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5138
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5139
const xmlChar *elem;
5145
int inputid = ctxt->input->id;
5147
/* Recurse on first child */
5150
cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5154
elem = xmlParseName(ctxt);
5156
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5159
cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5161
xmlErrMemory(ctxt, NULL);
5166
cur->ocur = XML_ELEMENT_CONTENT_OPT;
5168
} else if (RAW == '*') {
5169
cur->ocur = XML_ELEMENT_CONTENT_MULT;
5171
} else if (RAW == '+') {
5172
cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5175
cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5181
while (RAW != ')') {
5183
* Each loop we parse one separator and one element.
5186
if (type == 0) type = CUR;
5189
* Detect "Name | Name , Name" error
5191
else if (type != CUR) {
5192
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5193
"xmlParseElementChildrenContentDecl : '%c' expected\n",
5195
if ((last != NULL) && (last != ret))
5196
xmlFreeDocElementContent(ctxt->myDoc, last);
5198
xmlFreeDocElementContent(ctxt->myDoc, ret);
5203
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5205
if ((last != NULL) && (last != ret))
5206
xmlFreeDocElementContent(ctxt->myDoc, last);
5207
xmlFreeDocElementContent(ctxt->myDoc, ret);
5225
} else if (RAW == '|') {
5226
if (type == 0) type = CUR;
5229
* Detect "Name , Name | Name" error
5231
else if (type != CUR) {
5232
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5233
"xmlParseElementChildrenContentDecl : '%c' expected\n",
5235
if ((last != NULL) && (last != ret))
5236
xmlFreeDocElementContent(ctxt->myDoc, last);
5238
xmlFreeDocElementContent(ctxt->myDoc, ret);
5243
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5245
if ((last != NULL) && (last != ret))
5246
xmlFreeDocElementContent(ctxt->myDoc, last);
5248
xmlFreeDocElementContent(ctxt->myDoc, ret);
5267
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5269
xmlFreeDocElementContent(ctxt->myDoc, ret);
5276
int inputid = ctxt->input->id;
5277
/* Recurse on second child */
5280
last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5283
elem = xmlParseName(ctxt);
5285
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5287
xmlFreeDocElementContent(ctxt->myDoc, ret);
5290
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5292
last->ocur = XML_ELEMENT_CONTENT_OPT;
5294
} else if (RAW == '*') {
5295
last->ocur = XML_ELEMENT_CONTENT_MULT;
5297
} else if (RAW == '+') {
5298
last->ocur = XML_ELEMENT_CONTENT_PLUS;
5301
last->ocur = XML_ELEMENT_CONTENT_ONCE;
5307
if ((cur != NULL) && (last != NULL)) {
5312
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5313
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5314
"Element content declaration doesn't start and stop in the same entity\n",
5320
if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5321
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
5322
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5324
ret->ocur = XML_ELEMENT_CONTENT_OPT;
5327
} else if (RAW == '*') {
5329
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5332
* Some normalization:
5333
* (a | b* | c?)* == (a | b | c)*
5335
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5336
if ((cur->c1 != NULL) &&
5337
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5338
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5339
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5340
if ((cur->c2 != NULL) &&
5341
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5342
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5343
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5348
} else if (RAW == '+') {
5352
if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5353
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
5354
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5356
ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5358
* Some normalization:
5359
* (a | b*)+ == (a | b)*
5360
* (a | b?)+ == (a | b)*
5362
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5363
if ((cur->c1 != NULL) &&
5364
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5365
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5366
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5369
if ((cur->c2 != NULL) &&
5370
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5371
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5372
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5378
ret->ocur = XML_ELEMENT_CONTENT_MULT;
5386
* xmlParseElementContentDecl:
5387
* @ctxt: an XML parser context
5388
* @name: the name of the element being defined.
5389
* @result: the Element Content pointer will be stored here if any
5391
* parse the declaration for an Element content either Mixed or Children,
5392
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5394
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5396
* returns: the type of element content XML_ELEMENT_TYPE_xxx
5400
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5401
xmlElementContentPtr *result) {
5403
xmlElementContentPtr tree = NULL;
5404
int inputid = ctxt->input->id;
5410
xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5411
"xmlParseElementContentDecl : %s '(' expected\n", name);
5417
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5418
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5419
res = XML_ELEMENT_TYPE_MIXED;
5421
tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5422
res = XML_ELEMENT_TYPE_ELEMENT;
5430
* xmlParseElementDecl:
5431
* @ctxt: an XML parser context
5433
* parse an Element declaration.
5435
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5437
* [ VC: Unique Element Type Declaration ]
5438
* No element type may be declared more than once
5440
* Returns the type of the element, or -1 in case of error
5443
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5444
const xmlChar *name;
5446
xmlElementContentPtr content = NULL;
5448
/* GROW; done in the caller */
5449
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5450
xmlParserInputPtr input = ctxt->input;
5453
if (!IS_BLANK_CH(CUR)) {
5454
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455
"Space required after 'ELEMENT'\n");
5458
name = xmlParseName(ctxt);
5460
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5461
"xmlParseElementDecl: no name for Element\n");
5464
while ((RAW == 0) && (ctxt->inputNr > 1))
5466
if (!IS_BLANK_CH(CUR)) {
5467
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468
"Space required after the element name\n");
5471
if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5474
* Element must always be empty.
5476
ret = XML_ELEMENT_TYPE_EMPTY;
5477
} else if ((RAW == 'A') && (NXT(1) == 'N') &&
5481
* Element is a generic container.
5483
ret = XML_ELEMENT_TYPE_ANY;
5484
} else if (RAW == '(') {
5485
ret = xmlParseElementContentDecl(ctxt, name, &content);
5488
* [ WFC: PEs in Internal Subset ] error handling.
5490
if ((RAW == '%') && (ctxt->external == 0) &&
5491
(ctxt->inputNr == 1)) {
5492
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5493
"PEReference: forbidden within markup decl in internal subset\n");
5495
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5496
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5503
* Pop-up of finished entities.
5505
while ((RAW == 0) && (ctxt->inputNr > 1))
5510
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5511
if (content != NULL) {
5512
xmlFreeDocElementContent(ctxt->myDoc, content);
5515
if (input != ctxt->input) {
5516
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5517
"Element declaration doesn't start and stop in the same entity\n");
5521
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5522
(ctxt->sax->elementDecl != NULL)) {
5523
if (content != NULL)
5524
content->parent = NULL;
5525
ctxt->sax->elementDecl(ctxt->userData, name, ret,
5527
if ((content != NULL) && (content->parent == NULL)) {
5529
* this is a trick: if xmlAddElementDecl is called,
5530
* instead of copying the full tree it is plugged directly
5531
* if called from the parser. Avoid duplicating the
5532
* interfaces or change the API/ABI
5534
xmlFreeDocElementContent(ctxt->myDoc, content);
5536
} else if (content != NULL) {
5537
xmlFreeDocElementContent(ctxt->myDoc, content);
5545
* xmlParseConditionalSections
5546
* @ctxt: an XML parser context
5548
* [61] conditionalSect ::= includeSect | ignoreSect
5549
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5550
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5551
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5552
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5556
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5559
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5563
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5567
if (xmlParserDebugEntities) {
5568
if ((ctxt->input != NULL) && (ctxt->input->filename))
5569
xmlGenericError(xmlGenericErrorContext,
5570
"%s(%d): ", ctxt->input->filename,
5572
xmlGenericError(xmlGenericErrorContext,
5573
"Entering INCLUDE Conditional Section\n");
5576
while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5578
const xmlChar *check = CUR_PTR;
5579
unsigned int cons = ctxt->input->consumed;
5581
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5582
xmlParseConditionalSections(ctxt);
5583
} else if (IS_BLANK_CH(CUR)) {
5585
} else if (RAW == '%') {
5586
xmlParsePEReference(ctxt);
5588
xmlParseMarkupDecl(ctxt);
5591
* Pop-up of finished entities.
5593
while ((RAW == 0) && (ctxt->inputNr > 1))
5596
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5597
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5601
if (xmlParserDebugEntities) {
5602
if ((ctxt->input != NULL) && (ctxt->input->filename))
5603
xmlGenericError(xmlGenericErrorContext,
5604
"%s(%d): ", ctxt->input->filename,
5606
xmlGenericError(xmlGenericErrorContext,
5607
"Leaving INCLUDE Conditional Section\n");
5610
} else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5612
xmlParserInputState instate;
5618
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5622
if (xmlParserDebugEntities) {
5623
if ((ctxt->input != NULL) && (ctxt->input->filename))
5624
xmlGenericError(xmlGenericErrorContext,
5625
"%s(%d): ", ctxt->input->filename,
5627
xmlGenericError(xmlGenericErrorContext,
5628
"Entering IGNORE Conditional Section\n");
5632
* Parse up to the end of the conditional section
5633
* But disable SAX event generating DTD building in the meantime
5635
state = ctxt->disableSAX;
5636
instate = ctxt->instate;
5637
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5638
ctxt->instate = XML_PARSER_IGNORE;
5640
while ((depth >= 0) && (RAW != 0)) {
5641
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5646
if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5647
if (--depth >= 0) SKIP(3);
5654
ctxt->disableSAX = state;
5655
ctxt->instate = instate;
5657
if (xmlParserDebugEntities) {
5658
if ((ctxt->input != NULL) && (ctxt->input->filename))
5659
xmlGenericError(xmlGenericErrorContext,
5660
"%s(%d): ", ctxt->input->filename,
5662
xmlGenericError(xmlGenericErrorContext,
5663
"Leaving IGNORE Conditional Section\n");
5667
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5674
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5681
* xmlParseMarkupDecl:
5682
* @ctxt: an XML parser context
5684
* parse Markup declarations
5686
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5687
* NotationDecl | PI | Comment
5689
* [ VC: Proper Declaration/PE Nesting ]
5690
* Parameter-entity replacement text must be properly nested with
5691
* markup declarations. That is to say, if either the first character
5692
* or the last character of a markup declaration (markupdecl above) is
5693
* contained in the replacement text for a parameter-entity reference,
5694
* both must be contained in the same replacement text.
5696
* [ WFC: PEs in Internal Subset ]
5697
* In the internal DTD subset, parameter-entity references can occur
5698
* only where markup declarations can occur, not within markup declarations.
5699
* (This does not apply to references that occur in external parameter
5700
* entities or to the external subset.)
5703
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5706
if (NXT(1) == '!') {
5710
xmlParseElementDecl(ctxt);
5711
else if (NXT(3) == 'N')
5712
xmlParseEntityDecl(ctxt);
5715
xmlParseAttributeListDecl(ctxt);
5718
xmlParseNotationDecl(ctxt);
5721
xmlParseComment(ctxt);
5724
/* there is an error but it will be detected later */
5727
} else if (NXT(1) == '?') {
5732
* This is only for internal subset. On external entities,
5733
* the replacement is done before parsing stage
5735
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5736
xmlParsePEReference(ctxt);
5739
* Conditional sections are allowed from entities included
5740
* by PE References in the internal subset.
5742
if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5743
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5744
xmlParseConditionalSections(ctxt);
5748
ctxt->instate = XML_PARSER_DTD;
5753
* @ctxt: an XML parser context
5755
* parse an XML declaration header for external entities
5757
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5759
* Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5763
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5765
const xmlChar *encoding;
5768
* We know that '<?xml' is here.
5770
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5773
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5777
if (!IS_BLANK_CH(CUR)) {
5778
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5779
"Space needed after '<?xml'\n");
5784
* We may have the VersionInfo here.
5786
version = xmlParseVersionInfo(ctxt);
5787
if (version == NULL)
5788
version = xmlCharStrdup(XML_DEFAULT_VERSION);
5790
if (!IS_BLANK_CH(CUR)) {
5791
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5792
"Space needed here\n");
5795
ctxt->input->version = version;
5798
* We must have the encoding declaration
5800
encoding = xmlParseEncodingDecl(ctxt);
5801
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5803
* The XML REC instructs us to stop parsing right here
5807
if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5808
xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5809
"Missing encoding in text declaration\n");
5813
if ((RAW == '?') && (NXT(1) == '>')) {
5815
} else if (RAW == '>') {
5816
/* Deprecated old WD ... */
5817
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5820
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5821
MOVETO_ENDTAG(CUR_PTR);
5827
* xmlParseExternalSubset:
5828
* @ctxt: an XML parser context
5829
* @ExternalID: the external identifier
5830
* @SystemID: the system identifier (or URL)
5832
* parse Markup declarations from an external subset
5834
* [30] extSubset ::= textDecl? extSubsetDecl
5836
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5839
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5840
const xmlChar *SystemID) {
5841
xmlDetectSAX2(ctxt);
5843
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
5844
xmlParseTextDecl(ctxt);
5845
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5847
* The XML REC instructs us to stop parsing right here
5849
ctxt->instate = XML_PARSER_EOF;
5853
if (ctxt->myDoc == NULL) {
5854
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5856
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5857
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5859
ctxt->instate = XML_PARSER_DTD;
5861
while (((RAW == '<') && (NXT(1) == '?')) ||
5862
((RAW == '<') && (NXT(1) == '!')) ||
5863
(RAW == '%') || IS_BLANK_CH(CUR)) {
5864
const xmlChar *check = CUR_PTR;
5865
unsigned int cons = ctxt->input->consumed;
5868
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5869
xmlParseConditionalSections(ctxt);
5870
} else if (IS_BLANK_CH(CUR)) {
5872
} else if (RAW == '%') {
5873
xmlParsePEReference(ctxt);
5875
xmlParseMarkupDecl(ctxt);
5878
* Pop-up of finished entities.
5880
while ((RAW == 0) && (ctxt->inputNr > 1))
5883
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5884
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5890
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5896
* xmlParseReference:
5897
* @ctxt: an XML parser context
5899
* parse and handle entity references in content, depending on the SAX
5900
* interface, this may end-up in a call to character() if this is a
5901
* CharRef, a predefined entity, if there is no reference() callback.
5902
* or if the parser was asked to switch to that mode.
5904
* [67] Reference ::= EntityRef | CharRef
5907
xmlParseReference(xmlParserCtxtPtr ctxt) {
5910
if (RAW != '&') return;
5912
if (NXT(1) == '#') {
5916
int value = xmlParseCharRef(ctxt);
5918
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5920
* So we are using non-UTF-8 buffers
5921
* Check that the char fit on 8bits, if not
5922
* generate a CharRef.
5924
if (value <= 0xFF) {
5927
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5928
(!ctxt->disableSAX))
5929
ctxt->sax->characters(ctxt->userData, out, 1);
5931
if ((hex == 'x') || (hex == 'X'))
5932
snprintf((char *)out, sizeof(out), "#x%X", value);
5934
snprintf((char *)out, sizeof(out), "#%d", value);
5935
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5936
(!ctxt->disableSAX))
5937
ctxt->sax->reference(ctxt->userData, out);
5941
* Just encode the value in UTF-8
5943
COPY_BUF(0 ,out, i, value);
5945
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5946
(!ctxt->disableSAX))
5947
ctxt->sax->characters(ctxt->userData, out, i);
5952
ent = xmlParseEntityRef(ctxt);
5953
if (ent == NULL) return;
5954
if (!ctxt->wellFormed)
5956
was_checked = ent->checked;
5957
if ((ent->name != NULL) &&
5958
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5959
xmlNodePtr list = NULL;
5960
xmlParserErrors ret = XML_ERR_OK;
5964
* The first reference to the entity trigger a parsing phase
5965
* where the ent->children is filled with the result from
5968
if (ent->checked == 0) {
5971
value = ent->content;
5974
* Check that this entity is well formed
5976
if ((value != NULL) && (value[0] != 0) &&
5977
(value[1] == 0) && (value[0] == '<') &&
5978
(xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5980
* DONE: get definite answer on this !!!
5981
* Lots of entity decls are used to declare a single
5984
* Which seems to be valid since
5985
* 2.4: The ampersand character (&) and the left angle
5986
* bracket (<) may appear in their literal form only
5987
* when used ... They are also legal within the literal
5988
* entity value of an internal entity declaration;i
5989
* see "4.3.2 Well-Formed Parsed Entities".
5990
* IMHO 2.4 and 4.3.2 are directly in contradiction.
5991
* Looking at the OASIS test suite and James Clark
5992
* tests, this is broken. However the XML REC uses
5993
* it. Is the XML REC not well-formed ????
5994
* This is a hack to avoid this problem
5996
* ANSWER: since lt gt amp .. are already defined,
5997
* this is a redefinition and hence the fact that the
5998
* content is not well balanced is not a Wf error, this
5999
* is lousy but acceptable.
6001
list = xmlNewDocText(ctxt->myDoc, value);
6003
if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6004
(ent->children == NULL)) {
6005
ent->children = list;
6008
list->parent = (xmlNodePtr) ent;
6010
xmlFreeNodeList(list);
6012
} else if (list != NULL) {
6013
xmlFreeNodeList(list);
6017
* 4.3.2: An internal general parsed entity is well-formed
6018
* if its replacement text matches the production labeled
6024
* This is a bit hackish but this seems the best
6025
* way to make sure both SAX and DOM entity support
6028
if (ctxt->userData == ctxt)
6031
user_data = ctxt->userData;
6033
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6035
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6036
value, user_data, &list);
6038
} else if (ent->etype ==
6039
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6041
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6042
ctxt->sax, user_data, ctxt->depth,
6043
ent->URI, ent->ExternalID, &list);
6046
ret = XML_ERR_ENTITY_PE_INTERNAL;
6047
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6048
"invalid entity type found\n", NULL);
6050
if (ret == XML_ERR_ENTITY_LOOP) {
6051
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6053
} else if ((ret == XML_ERR_OK) && (list != NULL)) {
6054
if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6055
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6056
(ent->children == NULL)) {
6057
ent->children = list;
6058
if (ctxt->replaceEntities) {
6060
* Prune it directly in the generated document
6061
* except for single text nodes.
6063
if (((list->type == XML_TEXT_NODE) &&
6064
(list->next == NULL)) ||
6065
(ctxt->parseMode == XML_PARSE_READER)) {
6066
list->parent = (xmlNodePtr) ent;
6071
while (list != NULL) {
6072
list->parent = (xmlNodePtr) ctxt->node;
6073
list->doc = ctxt->myDoc;
6074
if (list->next == NULL)
6078
list = ent->children;
6079
#ifdef LIBXML_LEGACY_ENABLED
6080
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6081
xmlAddEntityReference(ent, list, NULL);
6082
#endif /* LIBXML_LEGACY_ENABLED */
6086
while (list != NULL) {
6087
list->parent = (xmlNodePtr) ent;
6088
if (list->next == NULL)
6094
xmlFreeNodeList(list);
6097
} else if ((ret != XML_ERR_OK) &&
6098
(ret != XML_WAR_UNDECLARED_ENTITY)) {
6099
xmlFatalErr(ctxt, ret, NULL);
6100
} else if (list != NULL) {
6101
xmlFreeNodeList(list);
6108
if (ent->children == NULL) {
6110
* Probably running in SAX mode and the callbacks don't
6111
* build the entity content. So unless we already went
6112
* though parsing for first checking go though the entity
6113
* content to generate callbacks associated to the entity
6115
if (was_checked == 1) {
6118
* This is a bit hackish but this seems the best
6119
* way to make sure both SAX and DOM entity support
6122
if (ctxt->userData == ctxt)
6125
user_data = ctxt->userData;
6127
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6129
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6130
ent->content, user_data, NULL);
6132
} else if (ent->etype ==
6133
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6135
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6136
ctxt->sax, user_data, ctxt->depth,
6137
ent->URI, ent->ExternalID, NULL);
6140
ret = XML_ERR_ENTITY_PE_INTERNAL;
6141
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6142
"invalid entity type found\n", NULL);
6144
if (ret == XML_ERR_ENTITY_LOOP) {
6145
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6149
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6150
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6152
* Entity reference callback comes second, it's somewhat
6153
* superfluous but a compatibility to historical behaviour
6155
ctxt->sax->reference(ctxt->userData, ent->name);
6159
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6160
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6164
ctxt->sax->reference(ctxt->userData, ent->name);
6167
if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6169
* There is a problem on the handling of _private for entities
6170
* (bug 155816): Should we copy the content of the field from
6171
* the entity (possibly overwriting some value set by the user
6172
* when a copy is created), should we leave it alone, or should
6173
* we try to take care of different situations? The problem
6174
* is exacerbated by the usage of this field by the xmlReader.
6175
* To fix this bug, we look at _private on the created node
6176
* and, if it's NULL, we copy in whatever was in the entity.
6177
* If it's not NULL we leave it alone. This is somewhat of a
6178
* hack - maybe we should have further tests to determine
6181
if ((ctxt->node != NULL) && (ent->children != NULL)) {
6183
* Seems we are generating the DOM content, do
6184
* a simple tree copy for all references except the first
6185
* In the first occurrence list contains the replacement.
6186
* progressive == 2 means we are operating on the Reader
6187
* and since nodes are discarded we must copy all the time.
6189
if (((list == NULL) && (ent->owner == 0)) ||
6190
(ctxt->parseMode == XML_PARSE_READER)) {
6191
xmlNodePtr nw = NULL, cur, firstChild = NULL;
6194
* when operating on a reader, the entities definitions
6195
* are always owning the entities subtree.
6196
if (ctxt->parseMode == XML_PARSE_READER)
6200
cur = ent->children;
6201
while (cur != NULL) {
6202
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6204
if (nw->_private == NULL)
6205
nw->_private = cur->_private;
6206
if (firstChild == NULL){
6209
nw = xmlAddChild(ctxt->node, nw);
6211
if (cur == ent->last) {
6213
* needed to detect some strange empty
6214
* node cases in the reader tests
6216
if ((ctxt->parseMode == XML_PARSE_READER) &&
6218
(nw->type == XML_ELEMENT_NODE) &&
6219
(nw->children == NULL))
6226
#ifdef LIBXML_LEGACY_ENABLED
6227
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6228
xmlAddEntityReference(ent, firstChild, nw);
6229
#endif /* LIBXML_LEGACY_ENABLED */
6230
} else if (list == NULL) {
6231
xmlNodePtr nw = NULL, cur, next, last,
6234
* Copy the entity child list and make it the new
6235
* entity child list. The goal is to make sure any
6236
* ID or REF referenced will be the one from the
6237
* document content and not the entity copy.
6239
cur = ent->children;
6240
ent->children = NULL;
6243
while (cur != NULL) {
6247
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6249
if (nw->_private == NULL)
6250
nw->_private = cur->_private;
6251
if (firstChild == NULL){
6254
xmlAddChild((xmlNodePtr) ent, nw);
6255
xmlAddChild(ctxt->node, cur);
6262
#ifdef LIBXML_LEGACY_ENABLED
6263
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6264
xmlAddEntityReference(ent, firstChild, nw);
6265
#endif /* LIBXML_LEGACY_ENABLED */
6267
const xmlChar *nbktext;
6270
* the name change is to avoid coalescing of the
6271
* node with a possible previous text one which
6272
* would make ent->children a dangling pointer
6274
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6276
if (ent->children->type == XML_TEXT_NODE)
6277
ent->children->name = nbktext;
6278
if ((ent->last != ent->children) &&
6279
(ent->last->type == XML_TEXT_NODE))
6280
ent->last->name = nbktext;
6281
xmlAddChildList(ctxt->node, ent->children);
6285
* This is to avoid a nasty side effect, see
6286
* characters() in SAX.c
6295
if (val == NULL) return;
6297
* inline the entity.
6299
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6300
(!ctxt->disableSAX))
6301
ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6307
* xmlParseEntityRef:
6308
* @ctxt: an XML parser context
6310
* parse ENTITY references declarations
6312
* [68] EntityRef ::= '&' Name ';'
6314
* [ WFC: Entity Declared ]
6315
* In a document without any DTD, a document with only an internal DTD
6316
* subset which contains no parameter entity references, or a document
6317
* with "standalone='yes'", the Name given in the entity reference
6318
* must match that in an entity declaration, except that well-formed
6319
* documents need not declare any of the following entities: amp, lt,
6320
* gt, apos, quot. The declaration of a parameter entity must precede
6321
* any reference to it. Similarly, the declaration of a general entity
6322
* must precede any reference to it which appears in a default value in an
6323
* attribute-list declaration. Note that if entities are declared in the
6324
* external subset or in external parameter entities, a non-validating
6325
* processor is not obligated to read and process their declarations;
6326
* for such documents, the rule that an entity must be declared is a
6327
* well-formedness constraint only if standalone='yes'.
6329
* [ WFC: Parsed Entity ]
6330
* An entity reference must not contain the name of an unparsed entity
6332
* Returns the xmlEntityPtr if found, or NULL otherwise.
6335
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6336
const xmlChar *name;
6337
xmlEntityPtr ent = NULL;
6343
name = xmlParseName(ctxt);
6345
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6346
"xmlParseEntityRef: no name\n");
6351
* Ask first SAX for entity resolution, otherwise try the
6354
if (ctxt->sax != NULL) {
6355
if (ctxt->sax->getEntity != NULL)
6356
ent = ctxt->sax->getEntity(ctxt->userData, name);
6357
if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6358
ent = xmlGetPredefinedEntity(name);
6359
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6360
(ctxt->userData==ctxt)) {
6361
ent = xmlSAX2GetEntity(ctxt, name);
6365
* [ WFC: Entity Declared ]
6366
* In a document without any DTD, a document with only an
6367
* internal DTD subset which contains no parameter entity
6368
* references, or a document with "standalone='yes'", the
6369
* Name given in the entity reference must match that in an
6370
* entity declaration, except that well-formed documents
6371
* need not declare any of the following entities: amp, lt,
6373
* The declaration of a parameter entity must precede any
6375
* Similarly, the declaration of a general entity must
6376
* precede any reference to it which appears in a default
6377
* value in an attribute-list declaration. Note that if
6378
* entities are declared in the external subset or in
6379
* external parameter entities, a non-validating processor
6380
* is not obligated to read and process their declarations;
6381
* for such documents, the rule that an entity must be
6382
* declared is a well-formedness constraint only if
6386
if ((ctxt->standalone == 1) ||
6387
((ctxt->hasExternalSubset == 0) &&
6388
(ctxt->hasPErefs == 0))) {
6389
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6390
"Entity '%s' not defined\n", name);
6392
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6393
"Entity '%s' not defined\n", name);
6394
if ((ctxt->inSubset == 0) &&
6395
(ctxt->sax != NULL) &&
6396
(ctxt->sax->reference != NULL)) {
6397
ctxt->sax->reference(ctxt->userData, name);
6404
* [ WFC: Parsed Entity ]
6405
* An entity reference must not contain the name of an
6408
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6409
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6410
"Entity reference to unparsed entity %s\n", name);
6414
* [ WFC: No External Entity References ]
6415
* Attribute values cannot contain direct or indirect
6416
* entity references to external entities.
6418
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6419
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6420
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6421
"Attribute references external entity '%s'\n", name);
6424
* [ WFC: No < in Attribute Values ]
6425
* The replacement text of any entity referred to directly or
6426
* indirectly in an attribute value (other than "<") must
6429
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6431
(!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6432
(ent->content != NULL) &&
6433
(xmlStrchr(ent->content, '<'))) {
6434
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6435
"'<' in entity '%s' is not allowed in attributes values\n", name);
6439
* Internal check, no parameter entities here ...
6442
switch (ent->etype) {
6443
case XML_INTERNAL_PARAMETER_ENTITY:
6444
case XML_EXTERNAL_PARAMETER_ENTITY:
6445
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6446
"Attempt to reference the parameter entity '%s'\n",
6455
* [ WFC: No Recursion ]
6456
* A parsed entity must not contain a recursive reference
6457
* to itself, either directly or indirectly.
6458
* Done somewhere else
6462
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6470
* xmlParseStringEntityRef:
6471
* @ctxt: an XML parser context
6472
* @str: a pointer to an index in the string
6474
* parse ENTITY references declarations, but this version parses it from
6477
* [68] EntityRef ::= '&' Name ';'
6479
* [ WFC: Entity Declared ]
6480
* In a document without any DTD, a document with only an internal DTD
6481
* subset which contains no parameter entity references, or a document
6482
* with "standalone='yes'", the Name given in the entity reference
6483
* must match that in an entity declaration, except that well-formed
6484
* documents need not declare any of the following entities: amp, lt,
6485
* gt, apos, quot. The declaration of a parameter entity must precede
6486
* any reference to it. Similarly, the declaration of a general entity
6487
* must precede any reference to it which appears in a default value in an
6488
* attribute-list declaration. Note that if entities are declared in the
6489
* external subset or in external parameter entities, a non-validating
6490
* processor is not obligated to read and process their declarations;
6491
* for such documents, the rule that an entity must be declared is a
6492
* well-formedness constraint only if standalone='yes'.
6494
* [ WFC: Parsed Entity ]
6495
* An entity reference must not contain the name of an unparsed entity
6497
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6498
* is updated to the current location in the string.
6501
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6505
xmlEntityPtr ent = NULL;
6507
if ((str == NULL) || (*str == NULL))
6514
name = xmlParseStringName(ctxt, &ptr);
6516
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6517
"xmlParseStringEntityRef: no name\n");
6522
* Ask first SAX for entity resolution, otherwise try the
6525
if (ctxt->sax != NULL) {
6526
if (ctxt->sax->getEntity != NULL)
6527
ent = ctxt->sax->getEntity(ctxt->userData, name);
6529
ent = xmlGetPredefinedEntity(name);
6530
if ((ent == NULL) && (ctxt->userData==ctxt)) {
6531
ent = xmlSAX2GetEntity(ctxt, name);
6535
* [ WFC: Entity Declared ]
6536
* In a document without any DTD, a document with only an
6537
* internal DTD subset which contains no parameter entity
6538
* references, or a document with "standalone='yes'", the
6539
* Name given in the entity reference must match that in an
6540
* entity declaration, except that well-formed documents
6541
* need not declare any of the following entities: amp, lt,
6543
* The declaration of a parameter entity must precede any
6545
* Similarly, the declaration of a general entity must
6546
* precede any reference to it which appears in a default
6547
* value in an attribute-list declaration. Note that if
6548
* entities are declared in the external subset or in
6549
* external parameter entities, a non-validating processor
6550
* is not obligated to read and process their declarations;
6551
* for such documents, the rule that an entity must be
6552
* declared is a well-formedness constraint only if
6556
if ((ctxt->standalone == 1) ||
6557
((ctxt->hasExternalSubset == 0) &&
6558
(ctxt->hasPErefs == 0))) {
6559
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6560
"Entity '%s' not defined\n", name);
6562
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6563
"Entity '%s' not defined\n",
6566
/* TODO ? check regressions ctxt->valid = 0; */
6570
* [ WFC: Parsed Entity ]
6571
* An entity reference must not contain the name of an
6574
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6575
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6576
"Entity reference to unparsed entity %s\n", name);
6580
* [ WFC: No External Entity References ]
6581
* Attribute values cannot contain direct or indirect
6582
* entity references to external entities.
6584
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6585
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6586
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6587
"Attribute references external entity '%s'\n", name);
6590
* [ WFC: No < in Attribute Values ]
6591
* The replacement text of any entity referred to directly or
6592
* indirectly in an attribute value (other than "<") must
6595
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6597
(!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6598
(ent->content != NULL) &&
6599
(xmlStrchr(ent->content, '<'))) {
6600
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6601
"'<' in entity '%s' is not allowed in attributes values\n",
6606
* Internal check, no parameter entities here ...
6609
switch (ent->etype) {
6610
case XML_INTERNAL_PARAMETER_ENTITY:
6611
case XML_EXTERNAL_PARAMETER_ENTITY:
6612
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6613
"Attempt to reference the parameter entity '%s'\n",
6622
* [ WFC: No Recursion ]
6623
* A parsed entity must not contain a recursive reference
6624
* to itself, either directly or indirectly.
6625
* Done somewhere else
6629
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6639
* xmlParsePEReference:
6640
* @ctxt: an XML parser context
6642
* parse PEReference declarations
6643
* The entity content is handled directly by pushing it's content as
6644
* a new input stream.
6646
* [69] PEReference ::= '%' Name ';'
6648
* [ WFC: No Recursion ]
6649
* A parsed entity must not contain a recursive
6650
* reference to itself, either directly or indirectly.
6652
* [ WFC: Entity Declared ]
6653
* In a document without any DTD, a document with only an internal DTD
6654
* subset which contains no parameter entity references, or a document
6655
* with "standalone='yes'", ... ... The declaration of a parameter
6656
* entity must precede any reference to it...
6658
* [ VC: Entity Declared ]
6659
* In a document with an external subset or external parameter entities
6660
* with "standalone='no'", ... ... The declaration of a parameter entity
6661
* must precede any reference to it...
6664
* Parameter-entity references may only appear in the DTD.
6665
* NOTE: misleading but this is handled.
6668
xmlParsePEReference(xmlParserCtxtPtr ctxt)
6670
const xmlChar *name;
6671
xmlEntityPtr entity = NULL;
6672
xmlParserInputPtr input;
6676
name = xmlParseName(ctxt);
6678
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6679
"xmlParsePEReference: no name\n");
6683
if ((ctxt->sax != NULL) &&
6684
(ctxt->sax->getParameterEntity != NULL))
6685
entity = ctxt->sax->getParameterEntity(ctxt->userData,
6687
if (entity == NULL) {
6689
* [ WFC: Entity Declared ]
6690
* In a document without any DTD, a document with only an
6691
* internal DTD subset which contains no parameter entity
6692
* references, or a document with "standalone='yes'", ...
6693
* ... The declaration of a parameter entity must precede
6694
* any reference to it...
6696
if ((ctxt->standalone == 1) ||
6697
((ctxt->hasExternalSubset == 0) &&
6698
(ctxt->hasPErefs == 0))) {
6699
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6700
"PEReference: %%%s; not found\n",
6704
* [ VC: Entity Declared ]
6705
* In a document with an external subset or external
6706
* parameter entities with "standalone='no'", ...
6707
* ... The declaration of a parameter entity must
6708
* precede any reference to it...
6710
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6711
"PEReference: %%%s; not found\n",
6717
* Internal checking in case the entity quest barfed
6719
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6720
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6721
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6722
"Internal: %%%s; is not a parameter entity\n",
6724
} else if (ctxt->input->free != deallocblankswrapper) {
6726
xmlNewBlanksWrapperInputStream(ctxt, entity);
6727
xmlPushInput(ctxt, input);
6731
* handle the extra spaces added before and after
6732
* c.f. http://www.w3.org/TR/REC-xml#as-PE
6734
input = xmlNewEntityInputStream(ctxt, entity);
6735
xmlPushInput(ctxt, input);
6736
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6737
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6738
(IS_BLANK_CH(NXT(5)))) {
6739
xmlParseTextDecl(ctxt);
6741
XML_ERR_UNSUPPORTED_ENCODING) {
6743
* The XML REC instructs us to stop parsing
6746
ctxt->instate = XML_PARSER_EOF;
6752
ctxt->hasPErefs = 1;
6754
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6761
* xmlParseStringPEReference:
6762
* @ctxt: an XML parser context
6763
* @str: a pointer to an index in the string
6765
* parse PEReference declarations
6767
* [69] PEReference ::= '%' Name ';'
6769
* [ WFC: No Recursion ]
6770
* A parsed entity must not contain a recursive
6771
* reference to itself, either directly or indirectly.
6773
* [ WFC: Entity Declared ]
6774
* In a document without any DTD, a document with only an internal DTD
6775
* subset which contains no parameter entity references, or a document
6776
* with "standalone='yes'", ... ... The declaration of a parameter
6777
* entity must precede any reference to it...
6779
* [ VC: Entity Declared ]
6780
* In a document with an external subset or external parameter entities
6781
* with "standalone='no'", ... ... The declaration of a parameter entity
6782
* must precede any reference to it...
6785
* Parameter-entity references may only appear in the DTD.
6786
* NOTE: misleading but this is handled.
6788
* Returns the string of the entity content.
6789
* str is updated to the current value of the index
6792
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6796
xmlEntityPtr entity = NULL;
6798
if ((str == NULL) || (*str == NULL)) return(NULL);
6804
name = xmlParseStringName(ctxt, &ptr);
6806
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6807
"xmlParseStringPEReference: no name\n");
6813
if ((ctxt->sax != NULL) &&
6814
(ctxt->sax->getParameterEntity != NULL))
6815
entity = ctxt->sax->getParameterEntity(ctxt->userData,
6817
if (entity == NULL) {
6819
* [ WFC: Entity Declared ]
6820
* In a document without any DTD, a document with only an
6821
* internal DTD subset which contains no parameter entity
6822
* references, or a document with "standalone='yes'", ...
6823
* ... The declaration of a parameter entity must precede
6824
* any reference to it...
6826
if ((ctxt->standalone == 1) ||
6827
((ctxt->hasExternalSubset == 0) &&
6828
(ctxt->hasPErefs == 0))) {
6829
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6830
"PEReference: %%%s; not found\n", name);
6833
* [ VC: Entity Declared ]
6834
* In a document with an external subset or external
6835
* parameter entities with "standalone='no'", ...
6836
* ... The declaration of a parameter entity must
6837
* precede any reference to it...
6839
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6840
"PEReference: %%%s; not found\n",
6846
* Internal checking in case the entity quest barfed
6848
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6849
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6850
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6851
"%%%s; is not a parameter entity\n",
6855
ctxt->hasPErefs = 1;
6857
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6867
* xmlParseDocTypeDecl:
6868
* @ctxt: an XML parser context
6870
* parse a DOCTYPE declaration
6872
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6873
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6875
* [ VC: Root Element Type ]
6876
* The Name in the document type declaration must match the element
6877
* type of the root element.
6881
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6882
const xmlChar *name = NULL;
6883
xmlChar *ExternalID = NULL;
6884
xmlChar *URI = NULL;
6887
* We know that '<!DOCTYPE' has been detected.
6894
* Parse the DOCTYPE name.
6896
name = xmlParseName(ctxt);
6898
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6899
"xmlParseDocTypeDecl : no DOCTYPE name !\n");
6901
ctxt->intSubName = name;
6906
* Check for SystemID and ExternalID
6908
URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6910
if ((URI != NULL) || (ExternalID != NULL)) {
6911
ctxt->hasExternalSubset = 1;
6913
ctxt->extSubURI = URI;
6914
ctxt->extSubSystem = ExternalID;
6919
* Create and update the internal subset.
6921
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6922
(!ctxt->disableSAX))
6923
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6926
* Is there any internal subset declarations ?
6927
* they are handled separately in xmlParseInternalSubset()
6933
* We should be at the end of the DOCTYPE declaration.
6936
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
6942
* xmlParseInternalSubset:
6943
* @ctxt: an XML parser context
6945
* parse the internal subset declaration
6947
* [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6951
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6953
* Is there any DTD definition ?
6956
ctxt->instate = XML_PARSER_DTD;
6959
* Parse the succession of Markup declarations and
6961
* Subsequence (markupdecl | PEReference | S)*
6963
while (RAW != ']') {
6964
const xmlChar *check = CUR_PTR;
6965
unsigned int cons = ctxt->input->consumed;
6968
xmlParseMarkupDecl(ctxt);
6969
xmlParsePEReference(ctxt);
6972
* Pop-up of finished entities.
6974
while ((RAW == 0) && (ctxt->inputNr > 1))
6977
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6978
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6979
"xmlParseInternalSubset: error detected in Markup declaration\n");
6990
* We should be at the end of the DOCTYPE declaration.
6993
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
6998
#ifdef LIBXML_SAX1_ENABLED
7000
* xmlParseAttribute:
7001
* @ctxt: an XML parser context
7002
* @value: a xmlChar ** used to store the value of the attribute
7004
* parse an attribute
7006
* [41] Attribute ::= Name Eq AttValue
7008
* [ WFC: No External Entity References ]
7009
* Attribute values cannot contain direct or indirect entity references
7010
* to external entities.
7012
* [ WFC: No < in Attribute Values ]
7013
* The replacement text of any entity referred to directly or indirectly in
7014
* an attribute value (other than "<") must not contain a <.
7016
* [ VC: Attribute Value Type ]
7017
* The attribute must have been declared; the value must be of the type
7020
* [25] Eq ::= S? '=' S?
7024
* [NS 11] Attribute ::= QName Eq AttValue
7026
* Also the case QName == xmlns:??? is handled independently as a namespace
7029
* Returns the attribute name, and the value in *value.
7033
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7034
const xmlChar *name;
7039
name = xmlParseName(ctxt);
7041
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7042
"error parsing attribute name\n");
7053
val = xmlParseAttValue(ctxt);
7054
ctxt->instate = XML_PARSER_CONTENT;
7056
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7057
"Specification mandate value for attribute %s\n", name);
7062
* Check that xml:lang conforms to the specification
7063
* No more registered as an error, just generate a warning now
7064
* since this was deprecated in XML second edition
7066
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7067
if (!xmlCheckLanguageID(val)) {
7068
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7069
"Malformed value for xml:lang : %s\n",
7075
* Check that xml:space conforms to the specification
7077
if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7078
if (xmlStrEqual(val, BAD_CAST "default"))
7080
else if (xmlStrEqual(val, BAD_CAST "preserve"))
7083
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7084
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7095
* @ctxt: an XML parser context
7097
* parse a start of tag either for rule element or
7098
* EmptyElement. In both case we don't parse the tag closing chars.
7100
* [40] STag ::= '<' Name (S Attribute)* S? '>'
7102
* [ WFC: Unique Att Spec ]
7103
* No attribute name may appear more than once in the same start-tag or
7104
* empty-element tag.
7106
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7108
* [ WFC: Unique Att Spec ]
7109
* No attribute name may appear more than once in the same start-tag or
7110
* empty-element tag.
7114
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7116
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7118
* Returns the element name parsed
7122
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7123
const xmlChar *name;
7124
const xmlChar *attname;
7126
const xmlChar **atts = ctxt->atts;
7128
int maxatts = ctxt->maxatts;
7131
if (RAW != '<') return(NULL);
7134
name = xmlParseName(ctxt);
7136
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7137
"xmlParseStartTag: invalid element name\n");
7142
* Now parse the attributes, it ends up with the ending
7149
while ((RAW != '>') &&
7150
((RAW != '/') || (NXT(1) != '>')) &&
7151
(IS_BYTE_CHAR(RAW))) {
7152
const xmlChar *q = CUR_PTR;
7153
unsigned int cons = ctxt->input->consumed;
7155
attname = xmlParseAttribute(ctxt, &attvalue);
7156
if ((attname != NULL) && (attvalue != NULL)) {
7158
* [ WFC: Unique Att Spec ]
7159
* No attribute name may appear more than once in the same
7160
* start-tag or empty-element tag.
7162
for (i = 0; i < nbatts;i += 2) {
7163
if (xmlStrEqual(atts[i], attname)) {
7164
xmlErrAttributeDup(ctxt, NULL, attname);
7170
* Add the pair to atts
7173
maxatts = 22; /* allow for 10 attrs by default */
7174
atts = (const xmlChar **)
7175
xmlMalloc(maxatts * sizeof(xmlChar *));
7177
xmlErrMemory(ctxt, NULL);
7178
if (attvalue != NULL)
7183
ctxt->maxatts = maxatts;
7184
} else if (nbatts + 4 > maxatts) {
7188
n = (const xmlChar **) xmlRealloc((void *) atts,
7189
maxatts * sizeof(const xmlChar *));
7191
xmlErrMemory(ctxt, NULL);
7192
if (attvalue != NULL)
7198
ctxt->maxatts = maxatts;
7200
atts[nbatts++] = attname;
7201
atts[nbatts++] = attvalue;
7202
atts[nbatts] = NULL;
7203
atts[nbatts + 1] = NULL;
7205
if (attvalue != NULL)
7212
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7214
if (!IS_BLANK_CH(RAW)) {
7215
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7216
"attributes construct error\n");
7219
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7220
(attname == NULL) && (attvalue == NULL)) {
7221
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7222
"xmlParseStartTag: problem parsing attributes\n");
7230
* SAX: Start of Element !
7232
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7233
(!ctxt->disableSAX)) {
7235
ctxt->sax->startElement(ctxt->userData, name, atts);
7237
ctxt->sax->startElement(ctxt->userData, name, NULL);
7241
/* Free only the content strings */
7242
for (i = 1;i < nbatts;i+=2)
7243
if (atts[i] != NULL)
7244
xmlFree((xmlChar *) atts[i]);
7251
* @ctxt: an XML parser context
7252
* @line: line of the start tag
7253
* @nsNr: number of namespaces on the start tag
7255
* parse an end of tag
7257
* [42] ETag ::= '</' Name S? '>'
7261
* [NS 9] ETag ::= '</' QName S? '>'
7265
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7266
const xmlChar *name;
7269
if ((RAW != '<') || (NXT(1) != '/')) {
7270
xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7271
"xmlParseEndTag: '</' not found\n");
7276
name = xmlParseNameAndCompare(ctxt,ctxt->name);
7279
* We should definitely be at the ending "S? '>'" part
7283
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7284
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7289
* [ WFC: Element Type Match ]
7290
* The Name in an element's end-tag must match the element type in the
7294
if (name != (xmlChar*)1) {
7295
if (name == NULL) name = BAD_CAST "unparseable";
7296
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7297
"Opening and ending tag mismatch: %s line %d and %s\n",
7298
ctxt->name, line, name);
7304
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7305
(!ctxt->disableSAX))
7306
ctxt->sax->endElement(ctxt->userData, ctxt->name);
7315
* @ctxt: an XML parser context
7317
* parse an end of tag
7319
* [42] ETag ::= '</' Name S? '>'
7323
* [NS 9] ETag ::= '</' QName S? '>'
7327
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7328
xmlParseEndTag1(ctxt, 0);
7330
#endif /* LIBXML_SAX1_ENABLED */
7332
/************************************************************************
7334
* SAX 2 specific operations *
7336
************************************************************************/
7338
static const xmlChar *
7339
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7345
* Handler for more complex cases
7349
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7350
(!IS_LETTER(c) && (c != '_'))) {
7354
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7355
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7356
(c == '.') || (c == '-') || (c == '_') ||
7357
(IS_COMBINING(c)) ||
7358
(IS_EXTENDER(c)))) {
7359
if (count++ > 100) {
7367
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7372
* @ctxt: an XML parser context
7373
* @prefix: the prefix to lookup
7375
* Lookup the namespace name for the @prefix (which ca be NULL)
7376
* The prefix must come from the @ctxt->dict dictionnary
7378
* Returns the namespace name or NULL if not bound
7380
static const xmlChar *
7381
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7384
if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7385
for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7386
if (ctxt->nsTab[i] == prefix) {
7387
if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7389
return(ctxt->nsTab[i + 1]);
7396
* @ctxt: an XML parser context
7397
* @len: lenght of the string parsed
7399
* parse an XML name.
7401
* [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7402
* CombiningChar | Extender
7404
* [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7406
* Returns the Name parsed or NULL
7409
static const xmlChar *
7410
xmlParseNCName(xmlParserCtxtPtr ctxt) {
7416
* Accelerator for simple ASCII names
7418
in = ctxt->input->cur;
7419
if (((*in >= 0x61) && (*in <= 0x7A)) ||
7420
((*in >= 0x41) && (*in <= 0x5A)) ||
7423
while (((*in >= 0x61) && (*in <= 0x7A)) ||
7424
((*in >= 0x41) && (*in <= 0x5A)) ||
7425
((*in >= 0x30) && (*in <= 0x39)) ||
7426
(*in == '_') || (*in == '-') ||
7429
if ((*in > 0) && (*in < 0x80)) {
7430
count = in - ctxt->input->cur;
7431
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7432
ctxt->input->cur = in;
7433
ctxt->nbChars += count;
7434
ctxt->input->col += count;
7436
xmlErrMemory(ctxt, NULL);
7441
return(xmlParseNCNameComplex(ctxt));
7446
* @ctxt: an XML parser context
7447
* @prefix: pointer to store the prefix part
7449
* parse an XML Namespace QName
7451
* [6] QName ::= (Prefix ':')? LocalPart
7452
* [7] Prefix ::= NCName
7453
* [8] LocalPart ::= NCName
7455
* Returns the Name parsed or NULL
7458
static const xmlChar *
7459
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7460
const xmlChar *l, *p;
7464
l = xmlParseNCName(ctxt);
7467
l = xmlParseName(ctxt);
7469
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7470
"Failed to parse QName '%s'\n", l, NULL, NULL);
7480
l = xmlParseNCName(ctxt);
7484
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7485
"Failed to parse QName '%s:'\n", p, NULL, NULL);
7486
tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7487
p = xmlDictLookup(ctxt->dict, tmp, -1);
7488
if (tmp != NULL) xmlFree(tmp);
7495
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7496
"Failed to parse QName '%s:%s:'\n", p, l, NULL);
7498
tmp = (xmlChar *) xmlParseName(ctxt);
7500
tmp = xmlBuildQName(tmp, l, NULL, 0);
7501
l = xmlDictLookup(ctxt->dict, tmp, -1);
7502
if (tmp != NULL) xmlFree(tmp);
7506
tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7507
l = xmlDictLookup(ctxt->dict, tmp, -1);
7508
if (tmp != NULL) xmlFree(tmp);
7519
* xmlParseQNameAndCompare:
7520
* @ctxt: an XML parser context
7521
* @name: the localname
7522
* @prefix: the prefix, if any.
7524
* parse an XML name and compares for match
7525
* (specialized for endtag parsing)
7527
* Returns NULL for an illegal name, (xmlChar*) 1 for success
7528
* and the name for mismatch
7531
static const xmlChar *
7532
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7533
xmlChar const *prefix) {
7534
const xmlChar *cmp = name;
7537
const xmlChar *prefix2;
7539
if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7542
in = ctxt->input->cur;
7545
while (*in != 0 && *in == *cmp) {
7549
if ((*cmp == 0) && (*in == ':')) {
7552
while (*in != 0 && *in == *cmp) {
7556
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7558
ctxt->input->cur = in;
7559
return((const xmlChar*) 1);
7563
* all strings coms from the dictionary, equality can be done directly
7565
ret = xmlParseQName (ctxt, &prefix2);
7566
if ((ret == name) && (prefix == prefix2))
7567
return((const xmlChar*) 1);
7572
* xmlParseAttValueInternal:
7573
* @ctxt: an XML parser context
7574
* @len: attribute len result
7575
* @alloc: whether the attribute was reallocated as a new string
7576
* @normalize: if 1 then further non-CDATA normalization must be done
7578
* parse a value for an attribute.
7579
* NOTE: if no normalization is needed, the routine will return pointers
7580
* directly from the data buffer.
7582
* 3.3.3 Attribute-Value Normalization:
7583
* Before the value of an attribute is passed to the application or
7584
* checked for validity, the XML processor must normalize it as follows:
7585
* - a character reference is processed by appending the referenced
7586
* character to the attribute value
7587
* - an entity reference is processed by recursively processing the
7588
* replacement text of the entity
7589
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7590
* appending #x20 to the normalized value, except that only a single
7591
* #x20 is appended for a "#xD#xA" sequence that is part of an external
7592
* parsed entity or the literal entity value of an internal parsed entity
7593
* - other characters are processed by appending them to the normalized value
7594
* If the declared value is not CDATA, then the XML processor must further
7595
* process the normalized attribute value by discarding any leading and
7596
* trailing space (#x20) characters, and by replacing sequences of space
7597
* (#x20) characters by a single space (#x20) character.
7598
* All attributes for which no declaration has been read should be treated
7599
* by a non-validating parser as if declared CDATA.
7601
* Returns the AttValue parsed or NULL. The value has to be freed by the
7602
* caller if it was copied, this can be detected by val[*len] == 0.
7606
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7610
const xmlChar *in = NULL, *start, *end, *last;
7611
xmlChar *ret = NULL;
7614
in = (xmlChar *) CUR_PTR;
7615
if (*in != '"' && *in != '\'') {
7616
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7619
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7622
* try to handle in this routine the most common case where no
7623
* allocation of a new string is required and where content is
7627
end = ctxt->input->end;
7630
const xmlChar *oldbase = ctxt->input->base;
7632
if (oldbase != ctxt->input->base) {
7633
long delta = ctxt->input->base - oldbase;
7634
start = start + delta;
7637
end = ctxt->input->end;
7641
* Skip any leading spaces
7643
while ((in < end) && (*in != limit) &&
7644
((*in == 0x20) || (*in == 0x9) ||
7645
(*in == 0xA) || (*in == 0xD))) {
7649
const xmlChar *oldbase = ctxt->input->base;
7651
if (oldbase != ctxt->input->base) {
7652
long delta = ctxt->input->base - oldbase;
7653
start = start + delta;
7656
end = ctxt->input->end;
7659
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7660
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7661
if ((*in++ == 0x20) && (*in == 0x20)) break;
7663
const xmlChar *oldbase = ctxt->input->base;
7665
if (oldbase != ctxt->input->base) {
7666
long delta = ctxt->input->base - oldbase;
7667
start = start + delta;
7670
end = ctxt->input->end;
7675
* skip the trailing blanks
7677
while ((last[-1] == 0x20) && (last > start)) last--;
7678
while ((in < end) && (*in != limit) &&
7679
((*in == 0x20) || (*in == 0x9) ||
7680
(*in == 0xA) || (*in == 0xD))) {
7683
const xmlChar *oldbase = ctxt->input->base;
7685
if (oldbase != ctxt->input->base) {
7686
long delta = ctxt->input->base - oldbase;
7687
start = start + delta;
7689
last = last + delta;
7691
end = ctxt->input->end;
7694
if (*in != limit) goto need_complex;
7696
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7697
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7700
const xmlChar *oldbase = ctxt->input->base;
7702
if (oldbase != ctxt->input->base) {
7703
long delta = ctxt->input->base - oldbase;
7704
start = start + delta;
7707
end = ctxt->input->end;
7711
if (*in != limit) goto need_complex;
7715
*len = last - start;
7716
ret = (xmlChar *) start;
7718
if (alloc) *alloc = 1;
7719
ret = xmlStrndup(start, last - start);
7722
if (alloc) *alloc = 0;
7725
if (alloc) *alloc = 1;
7726
return xmlParseAttValueComplex(ctxt, len, normalize);
7730
* xmlParseAttribute2:
7731
* @ctxt: an XML parser context
7732
* @pref: the element prefix
7733
* @elem: the element name
7734
* @prefix: a xmlChar ** used to store the value of the attribute prefix
7735
* @value: a xmlChar ** used to store the value of the attribute
7736
* @len: an int * to save the length of the attribute
7737
* @alloc: an int * to indicate if the attribute was allocated
7739
* parse an attribute in the new SAX2 framework.
7741
* Returns the attribute name, and the value in *value, .
7744
static const xmlChar *
7745
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7746
const xmlChar *pref, const xmlChar *elem,
7747
const xmlChar **prefix, xmlChar **value,
7748
int *len, int *alloc) {
7749
const xmlChar *name;
7750
xmlChar *val, *internal_val = NULL;
7755
name = xmlParseQName(ctxt, prefix);
7757
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7758
"error parsing attribute name\n");
7763
* get the type if needed
7765
if (ctxt->attsSpecial != NULL) {
7768
type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7769
pref, elem, *prefix, name);
7770
if (type != 0) normalize = 1;
7780
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
7781
ctxt->instate = XML_PARSER_CONTENT;
7783
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7784
"Specification mandate value for attribute %s\n", name);
7788
if (*prefix == ctxt->str_xml) {
7790
* Check that xml:lang conforms to the specification
7791
* No more registered as an error, just generate a warning now
7792
* since this was deprecated in XML second edition
7794
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7795
internal_val = xmlStrndup(val, *len);
7796
if (!xmlCheckLanguageID(internal_val)) {
7797
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7798
"Malformed value for xml:lang : %s\n",
7799
internal_val, NULL);
7804
* Check that xml:space conforms to the specification
7806
if (xmlStrEqual(name, BAD_CAST "space")) {
7807
internal_val = xmlStrndup(val, *len);
7808
if (xmlStrEqual(internal_val, BAD_CAST "default"))
7810
else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7813
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7814
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7815
internal_val, NULL);
7819
xmlFree(internal_val);
7828
* xmlParseStartTag2:
7829
* @ctxt: an XML parser context
7831
* parse a start of tag either for rule element or
7832
* EmptyElement. In both case we don't parse the tag closing chars.
7833
* This routine is called when running SAX2 parsing
7835
* [40] STag ::= '<' Name (S Attribute)* S? '>'
7837
* [ WFC: Unique Att Spec ]
7838
* No attribute name may appear more than once in the same start-tag or
7839
* empty-element tag.
7841
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7843
* [ WFC: Unique Att Spec ]
7844
* No attribute name may appear more than once in the same start-tag or
7845
* empty-element tag.
7849
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7851
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7853
* Returns the element name parsed
7856
static const xmlChar *
7857
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7858
const xmlChar **URI, int *tlen) {
7859
const xmlChar *localname;
7860
const xmlChar *prefix;
7861
const xmlChar *attname;
7862
const xmlChar *aprefix;
7863
const xmlChar *nsname;
7865
const xmlChar **atts = ctxt->atts;
7866
int maxatts = ctxt->maxatts;
7867
int nratts, nbatts, nbdef;
7868
int i, j, nbNs, attval, oldline, oldcol;
7869
const xmlChar *base;
7871
int nsNr = ctxt->nsNr;
7873
if (RAW != '<') return(NULL);
7877
* NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7878
* point since the attribute values may be stored as pointers to
7879
* the buffer and calling SHRINK would destroy them !
7880
* The Shrinking is only possible once the full set of attribute
7881
* callbacks have been done.
7885
base = ctxt->input->base;
7886
cur = ctxt->input->cur - ctxt->input->base;
7887
oldline = ctxt->input->line;
7888
oldcol = ctxt->input->col;
7894
/* Forget any namespaces added during an earlier parse of this element. */
7897
localname = xmlParseQName(ctxt, &prefix);
7898
if (localname == NULL) {
7899
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7900
"StartTag: invalid element name\n");
7903
*tlen = ctxt->input->cur - ctxt->input->base - cur;
7906
* Now parse the attributes, it ends up with the ending
7912
if (ctxt->input->base != base) goto base_changed;
7914
while ((RAW != '>') &&
7915
((RAW != '/') || (NXT(1) != '>')) &&
7916
(IS_BYTE_CHAR(RAW))) {
7917
const xmlChar *q = CUR_PTR;
7918
unsigned int cons = ctxt->input->consumed;
7919
int len = -1, alloc = 0;
7921
attname = xmlParseAttribute2(ctxt, prefix, localname,
7922
&aprefix, &attvalue, &len, &alloc);
7923
if (ctxt->input->base != base) {
7924
if ((attvalue != NULL) && (alloc != 0))
7929
if ((attname != NULL) && (attvalue != NULL)) {
7930
if (len < 0) len = xmlStrlen(attvalue);
7931
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7932
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7936
uri = xmlParseURI((const char *) URL);
7938
xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7939
"xmlns: %s not a valid URI\n",
7942
if (uri->scheme == NULL) {
7943
xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7944
"xmlns: URI %s is not absolute\n",
7951
* check that it's not a defined namespace
7953
for (j = 1;j <= nbNs;j++)
7954
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7957
xmlErrAttributeDup(ctxt, NULL, attname);
7959
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
7960
if (alloc != 0) xmlFree(attvalue);
7964
if (aprefix == ctxt->str_xmlns) {
7965
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7968
if (attname == ctxt->str_xml) {
7969
if (URL != ctxt->str_xml_ns) {
7970
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7971
"xml namespace prefix mapped to wrong URI\n",
7975
* Do not keep a namespace definition node
7977
if (alloc != 0) xmlFree(attvalue);
7981
uri = xmlParseURI((const char *) URL);
7983
xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7984
"xmlns:%s: '%s' is not a valid URI\n",
7987
if ((ctxt->pedantic) && (uri->scheme == NULL)) {
7988
xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7989
"xmlns:%s: URI %s is not absolute\n",
7996
* check that it's not a defined namespace
7998
for (j = 1;j <= nbNs;j++)
7999
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8002
xmlErrAttributeDup(ctxt, aprefix, attname);
8004
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8005
if (alloc != 0) xmlFree(attvalue);
8007
if (ctxt->input->base != base) goto base_changed;
8012
* Add the pair to atts
8014
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8015
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8016
if (attvalue[len] == 0)
8020
maxatts = ctxt->maxatts;
8023
ctxt->attallocs[nratts++] = alloc;
8024
atts[nbatts++] = attname;
8025
atts[nbatts++] = aprefix;
8026
atts[nbatts++] = NULL; /* the URI will be fetched later */
8027
atts[nbatts++] = attvalue;
8029
atts[nbatts++] = attvalue;
8031
* tag if some deallocation is needed
8033
if (alloc != 0) attval = 1;
8035
if ((attvalue != NULL) && (attvalue[len] == 0))
8042
if (ctxt->input->base != base) goto base_changed;
8043
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8045
if (!IS_BLANK_CH(RAW)) {
8046
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8047
"attributes construct error\n");
8051
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8052
(attname == NULL) && (attvalue == NULL)) {
8053
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8054
"xmlParseStartTag: problem parsing attributes\n");
8058
if (ctxt->input->base != base) goto base_changed;
8062
* The attributes defaulting
8064
if (ctxt->attsDefault != NULL) {
8065
xmlDefAttrsPtr defaults;
8067
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8068
if (defaults != NULL) {
8069
for (i = 0;i < defaults->nbAttrs;i++) {
8070
attname = defaults->values[4 * i];
8071
aprefix = defaults->values[4 * i + 1];
8074
* special work for namespaces defaulted defs
8076
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8078
* check that it's not a defined namespace
8080
for (j = 1;j <= nbNs;j++)
8081
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8083
if (j <= nbNs) continue;
8085
nsname = xmlGetNamespace(ctxt, NULL);
8086
if (nsname != defaults->values[4 * i + 2]) {
8087
if (nsPush(ctxt, NULL,
8088
defaults->values[4 * i + 2]) > 0)
8091
} else if (aprefix == ctxt->str_xmlns) {
8093
* check that it's not a defined namespace
8095
for (j = 1;j <= nbNs;j++)
8096
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8098
if (j <= nbNs) continue;
8100
nsname = xmlGetNamespace(ctxt, attname);
8101
if (nsname != defaults->values[2]) {
8102
if (nsPush(ctxt, attname,
8103
defaults->values[4 * i + 2]) > 0)
8108
* check that it's not a defined attribute
8110
for (j = 0;j < nbatts;j+=5) {
8111
if ((attname == atts[j]) && (aprefix == atts[j+1]))
8114
if (j < nbatts) continue;
8116
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8117
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8120
maxatts = ctxt->maxatts;
8123
atts[nbatts++] = attname;
8124
atts[nbatts++] = aprefix;
8125
if (aprefix == NULL)
8126
atts[nbatts++] = NULL;
8128
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8129
atts[nbatts++] = defaults->values[4 * i + 2];
8130
atts[nbatts++] = defaults->values[4 * i + 3];
8138
* The attributes checkings
8140
for (i = 0; i < nbatts;i += 5) {
8142
* The default namespace does not apply to attribute names.
8144
if (atts[i + 1] != NULL) {
8145
nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8146
if (nsname == NULL) {
8147
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8148
"Namespace prefix %s for %s on %s is not defined\n",
8149
atts[i + 1], atts[i], localname);
8151
atts[i + 2] = nsname;
8155
* [ WFC: Unique Att Spec ]
8156
* No attribute name may appear more than once in the same
8157
* start-tag or empty-element tag.
8158
* As extended by the Namespace in XML REC.
8160
for (j = 0; j < i;j += 5) {
8161
if (atts[i] == atts[j]) {
8162
if (atts[i+1] == atts[j+1]) {
8163
xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8166
if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8167
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8168
"Namespaced Attribute %s in '%s' redefined\n",
8169
atts[i], nsname, NULL);
8176
nsname = xmlGetNamespace(ctxt, prefix);
8177
if ((prefix != NULL) && (nsname == NULL)) {
8178
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8179
"Namespace prefix %s on %s is not defined\n",
8180
prefix, localname, NULL);
8186
* SAX: Start of Element !
8188
if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8189
(!ctxt->disableSAX)) {
8191
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8192
nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8193
nbatts / 5, nbdef, atts);
8195
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8196
nsname, 0, NULL, nbatts / 5, nbdef, atts);
8200
* Free up attribute allocated strings if needed
8203
for (i = 3,j = 0; j < nratts;i += 5,j++)
8204
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8205
xmlFree((xmlChar *) atts[i]);
8212
* the attribute strings are valid iif the base didn't changed
8215
for (i = 3,j = 0; j < nratts;i += 5,j++)
8216
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8217
xmlFree((xmlChar *) atts[i]);
8219
ctxt->input->cur = ctxt->input->base + cur;
8220
ctxt->input->line = oldline;
8221
ctxt->input->col = oldcol;
8222
if (ctxt->wellFormed == 1) {
8230
* @ctxt: an XML parser context
8231
* @line: line of the start tag
8232
* @nsNr: number of namespaces on the start tag
8234
* parse an end of tag
8236
* [42] ETag ::= '</' Name S? '>'
8240
* [NS 9] ETag ::= '</' QName S? '>'
8244
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8245
const xmlChar *URI, int line, int nsNr, int tlen) {
8246
const xmlChar *name;
8249
if ((RAW != '<') || (NXT(1) != '/')) {
8250
xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8255
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8256
if (ctxt->input->cur[tlen] == '>') {
8257
ctxt->input->cur += tlen + 1;
8260
ctxt->input->cur += tlen;
8264
name = xmlParseNameAndCompare(ctxt, ctxt->name);
8266
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8270
* We should definitely be at the ending "S? '>'" part
8274
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8275
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8280
* [ WFC: Element Type Match ]
8281
* The Name in an element's end-tag must match the element type in the
8285
if (name != (xmlChar*)1) {
8286
if (name == NULL) name = BAD_CAST "unparseable";
8287
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8288
"Opening and ending tag mismatch: %s line %d and %s\n",
8289
ctxt->name, line, name);
8296
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8297
(!ctxt->disableSAX))
8298
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8308
* @ctxt: an XML parser context
8310
* Parse escaped pure raw content.
8312
* [18] CDSect ::= CDStart CData CDEnd
8314
* [19] CDStart ::= '<![CDATA['
8316
* [20] Data ::= (Char* - (Char* ']]>' Char*))
8318
* [21] CDEnd ::= ']]>'
8321
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8322
xmlChar *buf = NULL;
8324
int size = XML_PARSER_BUFFER_SIZE;
8330
/* Check 2.6.0 was NXT(0) not RAW */
8331
if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8336
ctxt->instate = XML_PARSER_CDATA_SECTION;
8339
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8340
ctxt->instate = XML_PARSER_CONTENT;
8346
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8347
ctxt->instate = XML_PARSER_CONTENT;
8352
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8354
xmlErrMemory(ctxt, NULL);
8357
while (IS_CHAR(cur) &&
8358
((r != ']') || (s != ']') || (cur != '>'))) {
8359
if (len + 5 >= size) {
8363
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8366
xmlErrMemory(ctxt, NULL);
8371
COPY_BUF(rl,buf,len,r);
8385
ctxt->instate = XML_PARSER_CONTENT;
8387
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8388
"CData section not finished\n%.50s\n", buf);
8395
* OK the buffer is to be consumed as cdata.
8397
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8398
if (ctxt->sax->cdataBlock != NULL)
8399
ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8400
else if (ctxt->sax->characters != NULL)
8401
ctxt->sax->characters(ctxt->userData, buf, len);
8408
* @ctxt: an XML parser context
8412
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8416
xmlParseContent(xmlParserCtxtPtr ctxt) {
8418
while ((RAW != 0) &&
8419
((RAW != '<') || (NXT(1) != '/')) &&
8420
(ctxt->instate != XML_PARSER_EOF)) {
8421
const xmlChar *test = CUR_PTR;
8422
unsigned int cons = ctxt->input->consumed;
8423
const xmlChar *cur = ctxt->input->cur;
8426
* First case : a Processing Instruction.
8428
if ((*cur == '<') && (cur[1] == '?')) {
8433
* Second case : a CDSection
8435
/* 2.6.0 test was *cur not RAW */
8436
else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8437
xmlParseCDSect(ctxt);
8441
* Third case : a comment
8443
else if ((*cur == '<') && (NXT(1) == '!') &&
8444
(NXT(2) == '-') && (NXT(3) == '-')) {
8445
xmlParseComment(ctxt);
8446
ctxt->instate = XML_PARSER_CONTENT;
8450
* Fourth case : a sub-element.
8452
else if (*cur == '<') {
8453
xmlParseElement(ctxt);
8457
* Fifth case : a reference. If if has not been resolved,
8458
* parsing returns it's Name, create the node
8461
else if (*cur == '&') {
8462
xmlParseReference(ctxt);
8466
* Last case, text. Note that References are handled directly.
8469
xmlParseCharData(ctxt, 0);
8474
* Pop-up of finished entities.
8476
while ((RAW == 0) && (ctxt->inputNr > 1))
8480
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8481
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
"detected an error in element content\n");
8483
ctxt->instate = XML_PARSER_EOF;
8491
* @ctxt: an XML parser context
8493
* parse an XML element, this is highly recursive
8495
* [39] element ::= EmptyElemTag | STag content ETag
8497
* [ WFC: Element Type Match ]
8498
* The Name in an element's end-tag must match the element type in the
8504
xmlParseElement(xmlParserCtxtPtr ctxt) {
8505
const xmlChar *name;
8506
const xmlChar *prefix;
8508
xmlParserNodeInfo node_info;
8511
int nsNr = ctxt->nsNr;
8513
if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8514
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8515
"Excessive depth in document: change xmlParserMaxDepth = %d\n",
8517
ctxt->instate = XML_PARSER_EOF;
8521
/* Capture start position */
8522
if (ctxt->record_info) {
8523
node_info.begin_pos = ctxt->input->consumed +
8524
(CUR_PTR - ctxt->input->base);
8525
node_info.begin_line = ctxt->input->line;
8528
if (ctxt->spaceNr == 0)
8529
spacePush(ctxt, -1);
8530
else if (*ctxt->space == -2)
8531
spacePush(ctxt, -1);
8533
spacePush(ctxt, *ctxt->space);
8535
line = ctxt->input->line;
8536
#ifdef LIBXML_SAX1_ENABLED
8538
#endif /* LIBXML_SAX1_ENABLED */
8539
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8540
#ifdef LIBXML_SAX1_ENABLED
8542
name = xmlParseStartTag(ctxt);
8543
#endif /* LIBXML_SAX1_ENABLED */
8548
namePush(ctxt, name);
8551
#ifdef LIBXML_VALID_ENABLED
8553
* [ VC: Root Element Type ]
8554
* The Name in the document type declaration must match the element
8555
* type of the root element.
8557
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8558
ctxt->node && (ctxt->node == ctxt->myDoc->children))
8559
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8560
#endif /* LIBXML_VALID_ENABLED */
8563
* Check for an Empty Element.
8565
if ((RAW == '/') && (NXT(1) == '>')) {
8568
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8569
(!ctxt->disableSAX))
8570
ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8571
#ifdef LIBXML_SAX1_ENABLED
8573
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8574
(!ctxt->disableSAX))
8575
ctxt->sax->endElement(ctxt->userData, name);
8576
#endif /* LIBXML_SAX1_ENABLED */
8580
if (nsNr != ctxt->nsNr)
8581
nsPop(ctxt, ctxt->nsNr - nsNr);
8582
if ( ret != NULL && ctxt->record_info ) {
8583
node_info.end_pos = ctxt->input->consumed +
8584
(CUR_PTR - ctxt->input->base);
8585
node_info.end_line = ctxt->input->line;
8586
node_info.node = ret;
8587
xmlParserAddNodeInfo(ctxt, &node_info);
8594
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8595
"Couldn't find end of Start Tag %s line %d\n",
8599
* end of parsing of this node.
8604
if (nsNr != ctxt->nsNr)
8605
nsPop(ctxt, ctxt->nsNr - nsNr);
8608
* Capture end position and add node
8610
if ( ret != NULL && ctxt->record_info ) {
8611
node_info.end_pos = ctxt->input->consumed +
8612
(CUR_PTR - ctxt->input->base);
8613
node_info.end_line = ctxt->input->line;
8614
node_info.node = ret;
8615
xmlParserAddNodeInfo(ctxt, &node_info);
8621
* Parse the content of the element:
8623
xmlParseContent(ctxt);
8624
if (!IS_BYTE_CHAR(RAW)) {
8625
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8626
"Premature end of data in tag %s line %d\n",
8630
* end of parsing of this node.
8635
if (nsNr != ctxt->nsNr)
8636
nsPop(ctxt, ctxt->nsNr - nsNr);
8641
* parse the end of tag: '</' should be here.
8644
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8647
#ifdef LIBXML_SAX1_ENABLED
8649
xmlParseEndTag1(ctxt, line);
8650
#endif /* LIBXML_SAX1_ENABLED */
8653
* Capture end position and add node
8655
if ( ret != NULL && ctxt->record_info ) {
8656
node_info.end_pos = ctxt->input->consumed +
8657
(CUR_PTR - ctxt->input->base);
8658
node_info.end_line = ctxt->input->line;
8659
node_info.node = ret;
8660
xmlParserAddNodeInfo(ctxt, &node_info);
8665
* xmlParseVersionNum:
8666
* @ctxt: an XML parser context
8668
* parse the XML version value.
8670
* [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8672
* Returns the string giving the XML version number, or NULL
8675
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8676
xmlChar *buf = NULL;
8681
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8683
xmlErrMemory(ctxt, NULL);
8687
while (((cur >= 'a') && (cur <= 'z')) ||
8688
((cur >= 'A') && (cur <= 'Z')) ||
8689
((cur >= '0') && (cur <= '9')) ||
8690
(cur == '_') || (cur == '.') ||
8691
(cur == ':') || (cur == '-')) {
8692
if (len + 1 >= size) {
8696
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8698
xmlErrMemory(ctxt, NULL);
8712
* xmlParseVersionInfo:
8713
* @ctxt: an XML parser context
8715
* parse the XML version.
8717
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8719
* [25] Eq ::= S? '=' S?
8721
* Returns the version string, e.g. "1.0"
8725
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8726
xmlChar *version = NULL;
8728
if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
8732
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8739
version = xmlParseVersionNum(ctxt);
8741
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8744
} else if (RAW == '\''){
8746
version = xmlParseVersionNum(ctxt);
8748
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8752
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8760
* @ctxt: an XML parser context
8762
* parse the XML encoding name
8764
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8766
* Returns the encoding name value or NULL
8769
xmlParseEncName(xmlParserCtxtPtr ctxt) {
8770
xmlChar *buf = NULL;
8776
if (((cur >= 'a') && (cur <= 'z')) ||
8777
((cur >= 'A') && (cur <= 'Z'))) {
8778
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8780
xmlErrMemory(ctxt, NULL);
8787
while (((cur >= 'a') && (cur <= 'z')) ||
8788
((cur >= 'A') && (cur <= 'Z')) ||
8789
((cur >= '0') && (cur <= '9')) ||
8790
(cur == '.') || (cur == '_') ||
8792
if (len + 1 >= size) {
8796
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8798
xmlErrMemory(ctxt, NULL);
8815
xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
8821
* xmlParseEncodingDecl:
8822
* @ctxt: an XML parser context
8824
* parse the XML encoding declaration
8826
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8828
* this setups the conversion filters.
8830
* Returns the encoding value or NULL
8834
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8835
xmlChar *encoding = NULL;
8838
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
8842
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8849
encoding = xmlParseEncName(ctxt);
8851
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8854
} else if (RAW == '\''){
8856
encoding = xmlParseEncName(ctxt);
8858
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8862
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8865
* UTF-16 encoding stwich has already taken place at this stage,
8866
* more over the little-endian/big-endian selection is already done
8868
if ((encoding != NULL) &&
8869
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8870
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
8871
if (ctxt->encoding != NULL)
8872
xmlFree((xmlChar *) ctxt->encoding);
8873
ctxt->encoding = encoding;
8876
* UTF-8 encoding is handled natively
8878
else if ((encoding != NULL) &&
8879
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8880
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
8881
if (ctxt->encoding != NULL)
8882
xmlFree((xmlChar *) ctxt->encoding);
8883
ctxt->encoding = encoding;
8885
else if (encoding != NULL) {
8886
xmlCharEncodingHandlerPtr handler;
8888
if (ctxt->input->encoding != NULL)
8889
xmlFree((xmlChar *) ctxt->input->encoding);
8890
ctxt->input->encoding = encoding;
8892
handler = xmlFindCharEncodingHandler((const char *) encoding);
8893
if (handler != NULL) {
8894
xmlSwitchToEncoding(ctxt, handler);
8896
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
8897
"Unsupported encoding %s\n", encoding);
8907
* @ctxt: an XML parser context
8909
* parse the XML standalone declaration
8911
* [32] SDDecl ::= S 'standalone' Eq
8912
* (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8914
* [ VC: Standalone Document Declaration ]
8915
* TODO The standalone document declaration must have the value "no"
8916
* if any external markup declarations contain declarations of:
8917
* - attributes with default values, if elements to which these
8918
* attributes apply appear in the document without specifications
8919
* of values for these attributes, or
8920
* - entities (other than amp, lt, gt, apos, quot), if references
8921
* to those entities appear in the document, or
8922
* - attributes with values subject to normalization, where the
8923
* attribute appears in the document with a value which will change
8924
* as a result of normalization, or
8925
* - element types with element content, if white space occurs directly
8926
* within any instance of those types.
8928
* Returns 1 if standalone, 0 otherwise
8932
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8933
int standalone = -1;
8936
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
8940
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8947
if ((RAW == 'n') && (NXT(1) == 'o')) {
8950
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
8955
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
8958
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8961
} else if (RAW == '"'){
8963
if ((RAW == 'n') && (NXT(1) == 'o')) {
8966
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
8971
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
8974
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8978
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8986
* @ctxt: an XML parser context
8988
* parse an XML declaration header
8990
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8994
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8998
* This value for standalone indicates that the document has an
8999
* XML declaration but it does not have a standalone attribute.
9000
* It will be overwritten later if a standalone attribute is found.
9002
ctxt->input->standalone = -2;
9005
* We know that '<?xml' is here.
9009
if (!IS_BLANK_CH(RAW)) {
9010
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9011
"Blank needed after '<?xml'\n");
9016
* We must have the VersionInfo here.
9018
version = xmlParseVersionInfo(ctxt);
9019
if (version == NULL) {
9020
xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9022
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9024
* TODO: Blueberry should be detected here
9026
xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9027
"Unsupported version '%s'\n",
9030
if (ctxt->version != NULL)
9031
xmlFree((void *) ctxt->version);
9032
ctxt->version = version;
9036
* We may have the encoding declaration
9038
if (!IS_BLANK_CH(RAW)) {
9039
if ((RAW == '?') && (NXT(1) == '>')) {
9043
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9045
xmlParseEncodingDecl(ctxt);
9046
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9048
* The XML REC instructs us to stop parsing right here
9054
* We may have the standalone status.
9056
if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9057
if ((RAW == '?') && (NXT(1) == '>')) {
9061
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9064
ctxt->input->standalone = xmlParseSDDecl(ctxt);
9067
if ((RAW == '?') && (NXT(1) == '>')) {
9069
} else if (RAW == '>') {
9070
/* Deprecated old WD ... */
9071
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9074
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9075
MOVETO_ENDTAG(CUR_PTR);
9082
* @ctxt: an XML parser context
9084
* parse an XML Misc* optional field.
9086
* [27] Misc ::= Comment | PI | S
9090
xmlParseMisc(xmlParserCtxtPtr ctxt) {
9091
while (((RAW == '<') && (NXT(1) == '?')) ||
9092
(CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9094
if ((RAW == '<') && (NXT(1) == '?')) {
9096
} else if (IS_BLANK_CH(CUR)) {
9099
xmlParseComment(ctxt);
9105
* @ctxt: an XML parser context
9107
* parse an XML document (and build a tree if using the standard SAX
9110
* [1] document ::= prolog element Misc*
9112
* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9114
* Returns 0, -1 in case of error. the parser context is augmented
9115
* as a result of the parsing.
9119
xmlParseDocument(xmlParserCtxtPtr ctxt) {
9121
xmlCharEncoding enc;
9125
if ((ctxt == NULL) || (ctxt->input == NULL))
9131
* SAX: detecting the level.
9133
xmlDetectSAX2(ctxt);
9136
* SAX: beginning of the document processing.
9138
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9139
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9141
if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9142
((ctxt->input->end - ctxt->input->cur) >= 4)) {
9144
* Get the 4 first bytes and decode the charset
9145
* if enc != XML_CHAR_ENCODING_NONE
9146
* plug some encoding conversion routines.
9152
enc = xmlDetectCharEncoding(&start[0], 4);
9153
if (enc != XML_CHAR_ENCODING_NONE) {
9154
xmlSwitchEncoding(ctxt, enc);
9160
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9164
* Check for the XMLDecl in the Prolog.
9167
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9170
* Note that we will switch encoding on the fly.
9172
xmlParseXMLDecl(ctxt);
9173
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9175
* The XML REC instructs us to stop parsing right here
9179
ctxt->standalone = ctxt->input->standalone;
9182
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9184
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9185
ctxt->sax->startDocument(ctxt->userData);
9188
* The Misc part of the Prolog
9194
* Then possibly doc type declaration(s) and more Misc
9195
* (doctypedecl Misc*)?
9198
if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9201
xmlParseDocTypeDecl(ctxt);
9203
ctxt->instate = XML_PARSER_DTD;
9204
xmlParseInternalSubset(ctxt);
9208
* Create and update the external subset.
9211
if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9212
(!ctxt->disableSAX))
9213
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9214
ctxt->extSubSystem, ctxt->extSubURI);
9218
ctxt->instate = XML_PARSER_PROLOG;
9223
* Time to start parsing the tree itself
9227
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9228
"Start tag expected, '<' not found\n");
9230
ctxt->instate = XML_PARSER_CONTENT;
9231
xmlParseElement(ctxt);
9232
ctxt->instate = XML_PARSER_EPILOG;
9236
* The Misc part at the end
9241
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9243
ctxt->instate = XML_PARSER_EOF;
9247
* SAX: end of the document processing.
9249
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9250
ctxt->sax->endDocument(ctxt->userData);
9253
* Remove locally kept entity definitions if the tree was not built
9255
if ((ctxt->myDoc != NULL) &&
9256
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9257
xmlFreeDoc(ctxt->myDoc);
9261
if (! ctxt->wellFormed) {
9269
* xmlParseExtParsedEnt:
9270
* @ctxt: an XML parser context
9272
* parse a general parsed entity
9273
* An external general parsed entity is well-formed if it matches the
9274
* production labeled extParsedEnt.
9276
* [78] extParsedEnt ::= TextDecl? content
9278
* Returns 0, -1 in case of error. the parser context is augmented
9279
* as a result of the parsing.
9283
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9285
xmlCharEncoding enc;
9287
if ((ctxt == NULL) || (ctxt->input == NULL))
9290
xmlDefaultSAXHandlerInit();
9292
xmlDetectSAX2(ctxt);
9297
* SAX: beginning of the document processing.
9299
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9300
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9303
* Get the 4 first bytes and decode the charset
9304
* if enc != XML_CHAR_ENCODING_NONE
9305
* plug some encoding conversion routines.
9307
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9312
enc = xmlDetectCharEncoding(start, 4);
9313
if (enc != XML_CHAR_ENCODING_NONE) {
9314
xmlSwitchEncoding(ctxt, enc);
9320
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9324
* Check for the XMLDecl in the Prolog.
9327
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9330
* Note that we will switch encoding on the fly.
9332
xmlParseXMLDecl(ctxt);
9333
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9335
* The XML REC instructs us to stop parsing right here
9341
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9343
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9344
ctxt->sax->startDocument(ctxt->userData);
9347
* Doing validity checking on chunk doesn't make sense
9349
ctxt->instate = XML_PARSER_CONTENT;
9351
ctxt->loadsubset = 0;
9354
xmlParseContent(ctxt);
9356
if ((RAW == '<') && (NXT(1) == '/')) {
9357
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9358
} else if (RAW != 0) {
9359
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9363
* SAX: end of the document processing.
9365
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9366
ctxt->sax->endDocument(ctxt->userData);
9368
if (! ctxt->wellFormed) return(-1);
9372
#ifdef LIBXML_PUSH_ENABLED
9373
/************************************************************************
9375
* Progressive parsing interfaces *
9377
************************************************************************/
9380
* xmlParseLookupSequence:
9381
* @ctxt: an XML parser context
9382
* @first: the first char to lookup
9383
* @next: the next char to lookup or zero
9384
* @third: the next char to lookup or zero
9386
* Try to find if a sequence (first, next, third) or just (first next) or
9387
* (first) is available in the input stream.
9388
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
9389
* to avoid rescanning sequences of bytes, it DOES change the state of the
9390
* parser, do not use liberally.
9392
* Returns the index to the current parsing point if the full sequence
9393
* is available, -1 otherwise.
9396
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9397
xmlChar next, xmlChar third) {
9399
xmlParserInputPtr in;
9403
if (in == NULL) return(-1);
9404
base = in->cur - in->base;
9405
if (base < 0) return(-1);
9406
if (ctxt->checkIndex > base)
9407
base = ctxt->checkIndex;
9408
if (in->buf == NULL) {
9412
buf = in->buf->buffer->content;
9413
len = in->buf->buffer->use;
9415
/* take into account the sequence length */
9416
if (third) len -= 2;
9417
else if (next) len --;
9418
for (;base < len;base++) {
9419
if (buf[base] == first) {
9421
if ((buf[base + 1] != next) ||
9422
(buf[base + 2] != third)) continue;
9423
} else if (next != 0) {
9424
if (buf[base + 1] != next) continue;
9426
ctxt->checkIndex = 0;
9429
xmlGenericError(xmlGenericErrorContext,
9430
"PP: lookup '%c' found at %d\n",
9432
else if (third == 0)
9433
xmlGenericError(xmlGenericErrorContext,
9434
"PP: lookup '%c%c' found at %d\n",
9437
xmlGenericError(xmlGenericErrorContext,
9438
"PP: lookup '%c%c%c' found at %d\n",
9439
first, next, third, base);
9441
return(base - (in->cur - in->base));
9444
ctxt->checkIndex = base;
9447
xmlGenericError(xmlGenericErrorContext,
9448
"PP: lookup '%c' failed\n", first);
9449
else if (third == 0)
9450
xmlGenericError(xmlGenericErrorContext,
9451
"PP: lookup '%c%c' failed\n", first, next);
9453
xmlGenericError(xmlGenericErrorContext,
9454
"PP: lookup '%c%c%c' failed\n", first, next, third);
9461
* @ctxt: an XML parser context
9462
* @lastlt: pointer to store the last '<' from the input
9463
* @lastgt: pointer to store the last '>' from the input
9465
* Lookup the last < and > in the current chunk
9468
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9469
const xmlChar **lastgt) {
9472
if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9473
xmlGenericError(xmlGenericErrorContext,
9474
"Internal error: xmlParseGetLasts\n");
9477
if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9478
tmp = ctxt->input->end;
9480
while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9481
if (tmp < ctxt->input->base) {
9487
while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9490
while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9491
if (tmp < ctxt->input->end) tmp++;
9492
} else if (*tmp == '"') {
9494
while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9495
if (tmp < ctxt->input->end) tmp++;
9499
if (tmp < ctxt->input->end)
9504
while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9505
if (tmp >= ctxt->input->base)
9517
* xmlCheckCdataPush:
9518
* @cur: pointer to the bock of characters
9519
* @len: length of the block in bytes
9521
* Check that the block of characters is okay as SCdata content [20]
9523
* Returns the number of bytes to pass if okay, a negative index where an
9524
* UTF-8 error occured otherwise
9527
xmlCheckCdataPush(const xmlChar *utf, int len) {
9532
if ((utf == NULL) || (len <= 0))
9535
for (ix = 0; ix < len;) { /* string is 0-terminated */
9537
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9540
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9544
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9545
if (ix + 2 > len) return(ix);
9546
if ((utf[ix+1] & 0xc0 ) != 0x80)
9548
codepoint = (utf[ix] & 0x1f) << 6;
9549
codepoint |= utf[ix+1] & 0x3f;
9550
if (!xmlIsCharQ(codepoint))
9553
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9554
if (ix + 3 > len) return(ix);
9555
if (((utf[ix+1] & 0xc0) != 0x80) ||
9556
((utf[ix+2] & 0xc0) != 0x80))
9558
codepoint = (utf[ix] & 0xf) << 12;
9559
codepoint |= (utf[ix+1] & 0x3f) << 6;
9560
codepoint |= utf[ix+2] & 0x3f;
9561
if (!xmlIsCharQ(codepoint))
9564
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9565
if (ix + 4 > len) return(ix);
9566
if (((utf[ix+1] & 0xc0) != 0x80) ||
9567
((utf[ix+2] & 0xc0) != 0x80) ||
9568
((utf[ix+3] & 0xc0) != 0x80))
9570
codepoint = (utf[ix] & 0x7) << 18;
9571
codepoint |= (utf[ix+1] & 0x3f) << 12;
9572
codepoint |= (utf[ix+2] & 0x3f) << 6;
9573
codepoint |= utf[ix+3] & 0x3f;
9574
if (!xmlIsCharQ(codepoint))
9577
} else /* unknown encoding */
9584
* xmlParseTryOrFinish:
9585
* @ctxt: an XML parser context
9586
* @terminate: last chunk indicator
9588
* Try to progress on parsing
9590
* Returns zero if no parsing was possible
9593
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9597
const xmlChar *lastlt, *lastgt;
9599
if (ctxt->input == NULL)
9603
switch (ctxt->instate) {
9604
case XML_PARSER_EOF:
9605
xmlGenericError(xmlGenericErrorContext,
9606
"PP: try EOF\n"); break;
9607
case XML_PARSER_START:
9608
xmlGenericError(xmlGenericErrorContext,
9609
"PP: try START\n"); break;
9610
case XML_PARSER_MISC:
9611
xmlGenericError(xmlGenericErrorContext,
9612
"PP: try MISC\n");break;
9613
case XML_PARSER_COMMENT:
9614
xmlGenericError(xmlGenericErrorContext,
9615
"PP: try COMMENT\n");break;
9616
case XML_PARSER_PROLOG:
9617
xmlGenericError(xmlGenericErrorContext,
9618
"PP: try PROLOG\n");break;
9619
case XML_PARSER_START_TAG:
9620
xmlGenericError(xmlGenericErrorContext,
9621
"PP: try START_TAG\n");break;
9622
case XML_PARSER_CONTENT:
9623
xmlGenericError(xmlGenericErrorContext,
9624
"PP: try CONTENT\n");break;
9625
case XML_PARSER_CDATA_SECTION:
9626
xmlGenericError(xmlGenericErrorContext,
9627
"PP: try CDATA_SECTION\n");break;
9628
case XML_PARSER_END_TAG:
9629
xmlGenericError(xmlGenericErrorContext,
9630
"PP: try END_TAG\n");break;
9631
case XML_PARSER_ENTITY_DECL:
9632
xmlGenericError(xmlGenericErrorContext,
9633
"PP: try ENTITY_DECL\n");break;
9634
case XML_PARSER_ENTITY_VALUE:
9635
xmlGenericError(xmlGenericErrorContext,
9636
"PP: try ENTITY_VALUE\n");break;
9637
case XML_PARSER_ATTRIBUTE_VALUE:
9638
xmlGenericError(xmlGenericErrorContext,
9639
"PP: try ATTRIBUTE_VALUE\n");break;
9640
case XML_PARSER_DTD:
9641
xmlGenericError(xmlGenericErrorContext,
9642
"PP: try DTD\n");break;
9643
case XML_PARSER_EPILOG:
9644
xmlGenericError(xmlGenericErrorContext,
9645
"PP: try EPILOG\n");break;
9647
xmlGenericError(xmlGenericErrorContext,
9648
"PP: try PI\n");break;
9649
case XML_PARSER_IGNORE:
9650
xmlGenericError(xmlGenericErrorContext,
9651
"PP: try IGNORE\n");break;
9655
if ((ctxt->input != NULL) &&
9656
(ctxt->input->cur - ctxt->input->base > 4096)) {
9658
ctxt->checkIndex = 0;
9660
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9663
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9668
* Pop-up of finished entities.
9670
while ((RAW == 0) && (ctxt->inputNr > 1))
9673
if (ctxt->input == NULL) break;
9674
if (ctxt->input->buf == NULL)
9675
avail = ctxt->input->length -
9676
(ctxt->input->cur - ctxt->input->base);
9679
* If we are operating on converted input, try to flush
9680
* remainng chars to avoid them stalling in the non-converted
9683
if ((ctxt->input->buf->raw != NULL) &&
9684
(ctxt->input->buf->raw->use > 0)) {
9685
int base = ctxt->input->base -
9686
ctxt->input->buf->buffer->content;
9687
int current = ctxt->input->cur - ctxt->input->base;
9689
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9690
ctxt->input->base = ctxt->input->buf->buffer->content + base;
9691
ctxt->input->cur = ctxt->input->base + current;
9693
&ctxt->input->buf->buffer->content[
9694
ctxt->input->buf->buffer->use];
9696
avail = ctxt->input->buf->buffer->use -
9697
(ctxt->input->cur - ctxt->input->base);
9701
switch (ctxt->instate) {
9702
case XML_PARSER_EOF:
9704
* Document parsing is done !
9707
case XML_PARSER_START:
9708
if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9710
xmlCharEncoding enc;
9713
* Very first chars read from the document flow.
9719
* Get the 4 first bytes and decode the charset
9720
* if enc != XML_CHAR_ENCODING_NONE
9721
* plug some encoding conversion routines,
9722
* else xmlSwitchEncoding will set to (default)
9729
enc = xmlDetectCharEncoding(start, 4);
9730
xmlSwitchEncoding(ctxt, enc);
9736
cur = ctxt->input->cur[0];
9737
next = ctxt->input->cur[1];
9739
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9740
ctxt->sax->setDocumentLocator(ctxt->userData,
9741
&xmlDefaultSAXLocator);
9742
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9743
ctxt->instate = XML_PARSER_EOF;
9745
xmlGenericError(xmlGenericErrorContext,
9746
"PP: entering EOF\n");
9748
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9749
ctxt->sax->endDocument(ctxt->userData);
9752
if ((cur == '<') && (next == '?')) {
9753
/* PI or XML decl */
9754
if (avail < 5) return(ret);
9756
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9758
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9759
ctxt->sax->setDocumentLocator(ctxt->userData,
9760
&xmlDefaultSAXLocator);
9761
if ((ctxt->input->cur[2] == 'x') &&
9762
(ctxt->input->cur[3] == 'm') &&
9763
(ctxt->input->cur[4] == 'l') &&
9764
(IS_BLANK_CH(ctxt->input->cur[5]))) {
9767
xmlGenericError(xmlGenericErrorContext,
9768
"PP: Parsing XML Decl\n");
9770
xmlParseXMLDecl(ctxt);
9771
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9773
* The XML REC instructs us to stop parsing right
9776
ctxt->instate = XML_PARSER_EOF;
9779
ctxt->standalone = ctxt->input->standalone;
9780
if ((ctxt->encoding == NULL) &&
9781
(ctxt->input->encoding != NULL))
9782
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9783
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9784
(!ctxt->disableSAX))
9785
ctxt->sax->startDocument(ctxt->userData);
9786
ctxt->instate = XML_PARSER_MISC;
9788
xmlGenericError(xmlGenericErrorContext,
9789
"PP: entering MISC\n");
9792
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9793
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9794
(!ctxt->disableSAX))
9795
ctxt->sax->startDocument(ctxt->userData);
9796
ctxt->instate = XML_PARSER_MISC;
9798
xmlGenericError(xmlGenericErrorContext,
9799
"PP: entering MISC\n");
9803
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9804
ctxt->sax->setDocumentLocator(ctxt->userData,
9805
&xmlDefaultSAXLocator);
9806
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9807
if (ctxt->version == NULL) {
9808
xmlErrMemory(ctxt, NULL);
9811
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9812
(!ctxt->disableSAX))
9813
ctxt->sax->startDocument(ctxt->userData);
9814
ctxt->instate = XML_PARSER_MISC;
9816
xmlGenericError(xmlGenericErrorContext,
9817
"PP: entering MISC\n");
9821
case XML_PARSER_START_TAG: {
9822
const xmlChar *name;
9823
const xmlChar *prefix;
9825
int nsNr = ctxt->nsNr;
9827
if ((avail < 2) && (ctxt->inputNr == 1))
9829
cur = ctxt->input->cur[0];
9831
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9832
ctxt->instate = XML_PARSER_EOF;
9833
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9834
ctxt->sax->endDocument(ctxt->userData);
9838
if (ctxt->progressive) {
9839
/* > can be found unescaped in attribute values */
9840
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
9842
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9846
if (ctxt->spaceNr == 0)
9847
spacePush(ctxt, -1);
9848
else if (*ctxt->space == -2)
9849
spacePush(ctxt, -1);
9851
spacePush(ctxt, *ctxt->space);
9852
#ifdef LIBXML_SAX1_ENABLED
9854
#endif /* LIBXML_SAX1_ENABLED */
9855
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9856
#ifdef LIBXML_SAX1_ENABLED
9858
name = xmlParseStartTag(ctxt);
9859
#endif /* LIBXML_SAX1_ENABLED */
9862
ctxt->instate = XML_PARSER_EOF;
9863
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9864
ctxt->sax->endDocument(ctxt->userData);
9867
#ifdef LIBXML_VALID_ENABLED
9869
* [ VC: Root Element Type ]
9870
* The Name in the document type declaration must match
9871
* the element type of the root element.
9873
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9874
ctxt->node && (ctxt->node == ctxt->myDoc->children))
9875
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9876
#endif /* LIBXML_VALID_ENABLED */
9879
* Check for an Empty Element.
9881
if ((RAW == '/') && (NXT(1) == '>')) {
9885
if ((ctxt->sax != NULL) &&
9886
(ctxt->sax->endElementNs != NULL) &&
9887
(!ctxt->disableSAX))
9888
ctxt->sax->endElementNs(ctxt->userData, name,
9890
if (ctxt->nsNr - nsNr > 0)
9891
nsPop(ctxt, ctxt->nsNr - nsNr);
9892
#ifdef LIBXML_SAX1_ENABLED
9894
if ((ctxt->sax != NULL) &&
9895
(ctxt->sax->endElement != NULL) &&
9896
(!ctxt->disableSAX))
9897
ctxt->sax->endElement(ctxt->userData, name);
9898
#endif /* LIBXML_SAX1_ENABLED */
9901
if (ctxt->nameNr == 0) {
9902
ctxt->instate = XML_PARSER_EPILOG;
9904
ctxt->instate = XML_PARSER_CONTENT;
9911
xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
9912
"Couldn't find end of Start Tag %s\n",
9918
nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9919
#ifdef LIBXML_SAX1_ENABLED
9921
namePush(ctxt, name);
9922
#endif /* LIBXML_SAX1_ENABLED */
9924
ctxt->instate = XML_PARSER_CONTENT;
9927
case XML_PARSER_CONTENT: {
9928
const xmlChar *test;
9930
if ((avail < 2) && (ctxt->inputNr == 1))
9932
cur = ctxt->input->cur[0];
9933
next = ctxt->input->cur[1];
9936
cons = ctxt->input->consumed;
9937
if ((cur == '<') && (next == '/')) {
9938
ctxt->instate = XML_PARSER_END_TAG;
9940
} else if ((cur == '<') && (next == '?')) {
9942
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9945
} else if ((cur == '<') && (next != '!')) {
9946
ctxt->instate = XML_PARSER_START_TAG;
9948
} else if ((cur == '<') && (next == '!') &&
9949
(ctxt->input->cur[2] == '-') &&
9950
(ctxt->input->cur[3] == '-')) {
9955
ctxt->input->cur += 4;
9956
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9957
ctxt->input->cur -= 4;
9958
if ((!terminate) && (term < 0))
9960
xmlParseComment(ctxt);
9961
ctxt->instate = XML_PARSER_CONTENT;
9962
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9963
(ctxt->input->cur[2] == '[') &&
9964
(ctxt->input->cur[3] == 'C') &&
9965
(ctxt->input->cur[4] == 'D') &&
9966
(ctxt->input->cur[5] == 'A') &&
9967
(ctxt->input->cur[6] == 'T') &&
9968
(ctxt->input->cur[7] == 'A') &&
9969
(ctxt->input->cur[8] == '[')) {
9971
ctxt->instate = XML_PARSER_CDATA_SECTION;
9973
} else if ((cur == '<') && (next == '!') &&
9976
} else if (cur == '&') {
9978
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9980
xmlParseReference(ctxt);
9982
/* TODO Avoid the extra copy, handle directly !!! */
9984
* Goal of the following test is:
9985
* - minimize calls to the SAX 'character' callback
9986
* when they are mergeable
9987
* - handle an problem for isBlank when we only parse
9988
* a sequence of blank chars and the next one is
9989
* not available to check against '<' presence.
9990
* - tries to homogenize the differences in SAX
9991
* callbacks between the push and pull versions
9994
if ((ctxt->inputNr == 1) &&
9995
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9997
if (ctxt->progressive) {
9998
if ((lastlt == NULL) ||
9999
(ctxt->input->cur > lastlt))
10001
} else if (xmlParseLookupSequence(ctxt,
10007
ctxt->checkIndex = 0;
10008
xmlParseCharData(ctxt, 0);
10011
* Pop-up of finished entities.
10013
while ((RAW == 0) && (ctxt->inputNr > 1))
10015
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10016
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10017
"detected an error in element content\n");
10018
ctxt->instate = XML_PARSER_EOF;
10023
case XML_PARSER_END_TAG:
10027
if (ctxt->progressive) {
10028
/* > can be found unescaped in attribute values */
10029
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10031
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10036
xmlParseEndTag2(ctxt,
10037
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10038
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10039
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10042
#ifdef LIBXML_SAX1_ENABLED
10044
xmlParseEndTag1(ctxt, 0);
10045
#endif /* LIBXML_SAX1_ENABLED */
10046
if (ctxt->nameNr == 0) {
10047
ctxt->instate = XML_PARSER_EPILOG;
10049
ctxt->instate = XML_PARSER_CONTENT;
10052
case XML_PARSER_CDATA_SECTION: {
10054
* The Push mode need to have the SAX callback for
10055
* cdataBlock merge back contiguous callbacks.
10059
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10061
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10064
tmp = xmlCheckCdataPush(ctxt->input->cur,
10065
XML_PARSER_BIG_BUFFER_SIZE);
10068
ctxt->input->cur += tmp;
10069
goto encoding_error;
10071
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10072
if (ctxt->sax->cdataBlock != NULL)
10073
ctxt->sax->cdataBlock(ctxt->userData,
10074
ctxt->input->cur, tmp);
10075
else if (ctxt->sax->characters != NULL)
10076
ctxt->sax->characters(ctxt->userData,
10077
ctxt->input->cur, tmp);
10080
ctxt->checkIndex = 0;
10086
tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10087
if ((tmp < 0) || (tmp != base)) {
10089
ctxt->input->cur += tmp;
10090
goto encoding_error;
10092
if ((ctxt->sax != NULL) && (base > 0) &&
10093
(!ctxt->disableSAX)) {
10094
if (ctxt->sax->cdataBlock != NULL)
10095
ctxt->sax->cdataBlock(ctxt->userData,
10096
ctxt->input->cur, base);
10097
else if (ctxt->sax->characters != NULL)
10098
ctxt->sax->characters(ctxt->userData,
10099
ctxt->input->cur, base);
10102
ctxt->checkIndex = 0;
10103
ctxt->instate = XML_PARSER_CONTENT;
10105
xmlGenericError(xmlGenericErrorContext,
10106
"PP: entering CONTENT\n");
10111
case XML_PARSER_MISC:
10113
if (ctxt->input->buf == NULL)
10114
avail = ctxt->input->length -
10115
(ctxt->input->cur - ctxt->input->base);
10117
avail = ctxt->input->buf->buffer->use -
10118
(ctxt->input->cur - ctxt->input->base);
10121
cur = ctxt->input->cur[0];
10122
next = ctxt->input->cur[1];
10123
if ((cur == '<') && (next == '?')) {
10124
if ((!terminate) &&
10125
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10128
xmlGenericError(xmlGenericErrorContext,
10129
"PP: Parsing PI\n");
10132
} else if ((cur == '<') && (next == '!') &&
10133
(ctxt->input->cur[2] == '-') &&
10134
(ctxt->input->cur[3] == '-')) {
10135
if ((!terminate) &&
10136
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10139
xmlGenericError(xmlGenericErrorContext,
10140
"PP: Parsing Comment\n");
10142
xmlParseComment(ctxt);
10143
ctxt->instate = XML_PARSER_MISC;
10144
} else if ((cur == '<') && (next == '!') &&
10145
(ctxt->input->cur[2] == 'D') &&
10146
(ctxt->input->cur[3] == 'O') &&
10147
(ctxt->input->cur[4] == 'C') &&
10148
(ctxt->input->cur[5] == 'T') &&
10149
(ctxt->input->cur[6] == 'Y') &&
10150
(ctxt->input->cur[7] == 'P') &&
10151
(ctxt->input->cur[8] == 'E')) {
10152
if ((!terminate) &&
10153
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10156
xmlGenericError(xmlGenericErrorContext,
10157
"PP: Parsing internal subset\n");
10159
ctxt->inSubset = 1;
10160
xmlParseDocTypeDecl(ctxt);
10162
ctxt->instate = XML_PARSER_DTD;
10164
xmlGenericError(xmlGenericErrorContext,
10165
"PP: entering DTD\n");
10169
* Create and update the external subset.
10171
ctxt->inSubset = 2;
10172
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10173
(ctxt->sax->externalSubset != NULL))
10174
ctxt->sax->externalSubset(ctxt->userData,
10175
ctxt->intSubName, ctxt->extSubSystem,
10177
ctxt->inSubset = 0;
10178
ctxt->instate = XML_PARSER_PROLOG;
10180
xmlGenericError(xmlGenericErrorContext,
10181
"PP: entering PROLOG\n");
10184
} else if ((cur == '<') && (next == '!') &&
10188
ctxt->instate = XML_PARSER_START_TAG;
10189
ctxt->progressive = 1;
10190
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10192
xmlGenericError(xmlGenericErrorContext,
10193
"PP: entering START_TAG\n");
10197
case XML_PARSER_PROLOG:
10199
if (ctxt->input->buf == NULL)
10200
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10202
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10205
cur = ctxt->input->cur[0];
10206
next = ctxt->input->cur[1];
10207
if ((cur == '<') && (next == '?')) {
10208
if ((!terminate) &&
10209
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10212
xmlGenericError(xmlGenericErrorContext,
10213
"PP: Parsing PI\n");
10216
} else if ((cur == '<') && (next == '!') &&
10217
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10218
if ((!terminate) &&
10219
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10222
xmlGenericError(xmlGenericErrorContext,
10223
"PP: Parsing Comment\n");
10225
xmlParseComment(ctxt);
10226
ctxt->instate = XML_PARSER_PROLOG;
10227
} else if ((cur == '<') && (next == '!') &&
10231
ctxt->instate = XML_PARSER_START_TAG;
10232
if (ctxt->progressive == 0)
10233
ctxt->progressive = 1;
10234
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10236
xmlGenericError(xmlGenericErrorContext,
10237
"PP: entering START_TAG\n");
10241
case XML_PARSER_EPILOG:
10243
if (ctxt->input->buf == NULL)
10244
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10246
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10249
cur = ctxt->input->cur[0];
10250
next = ctxt->input->cur[1];
10251
if ((cur == '<') && (next == '?')) {
10252
if ((!terminate) &&
10253
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10256
xmlGenericError(xmlGenericErrorContext,
10257
"PP: Parsing PI\n");
10260
ctxt->instate = XML_PARSER_EPILOG;
10261
} else if ((cur == '<') && (next == '!') &&
10262
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10263
if ((!terminate) &&
10264
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10267
xmlGenericError(xmlGenericErrorContext,
10268
"PP: Parsing Comment\n");
10270
xmlParseComment(ctxt);
10271
ctxt->instate = XML_PARSER_EPILOG;
10272
} else if ((cur == '<') && (next == '!') &&
10276
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10277
ctxt->instate = XML_PARSER_EOF;
10279
xmlGenericError(xmlGenericErrorContext,
10280
"PP: entering EOF\n");
10282
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10283
ctxt->sax->endDocument(ctxt->userData);
10287
case XML_PARSER_DTD: {
10289
* Sorry but progressive parsing of the internal subset
10290
* is not expected to be supported. We first check that
10291
* the full content of the internal subset is available and
10292
* the parsing is launched only at that point.
10293
* Internal subset ends up with "']' S? '>'" in an unescaped
10294
* section and not in a ']]>' sequence which are conditional
10295
* sections (whoever argued to keep that crap in XML deserve
10296
* a place in hell !).
10302
base = ctxt->input->cur - ctxt->input->base;
10303
if (base < 0) return(0);
10304
if (ctxt->checkIndex > base)
10305
base = ctxt->checkIndex;
10306
buf = ctxt->input->buf->buffer->content;
10307
for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10310
if (buf[base] == quote)
10314
if ((quote == 0) && (buf[base] == '<')) {
10316
/* special handling of comments */
10317
if (((unsigned int) base + 4 <
10318
ctxt->input->buf->buffer->use) &&
10319
(buf[base + 1] == '!') &&
10320
(buf[base + 2] == '-') &&
10321
(buf[base + 3] == '-')) {
10322
for (;(unsigned int) base + 3 <
10323
ctxt->input->buf->buffer->use; base++) {
10324
if ((buf[base] == '-') &&
10325
(buf[base + 1] == '-') &&
10326
(buf[base + 2] == '>')) {
10334
fprintf(stderr, "unfinished comment\n");
10341
if (buf[base] == '"') {
10345
if (buf[base] == '\'') {
10349
if (buf[base] == ']') {
10351
fprintf(stderr, "%c%c%c%c: ", buf[base],
10352
buf[base + 1], buf[base + 2], buf[base + 3]);
10354
if ((unsigned int) base +1 >=
10355
ctxt->input->buf->buffer->use)
10357
if (buf[base + 1] == ']') {
10358
/* conditional crap, skip both ']' ! */
10363
(unsigned int) base + i < ctxt->input->buf->buffer->use;
10365
if (buf[base + i] == '>') {
10367
fprintf(stderr, "found\n");
10369
goto found_end_int_subset;
10371
if (!IS_BLANK_CH(buf[base + i])) {
10373
fprintf(stderr, "not found\n");
10375
goto not_end_of_int_subset;
10379
fprintf(stderr, "end of stream\n");
10384
not_end_of_int_subset:
10385
continue; /* for */
10388
* We didn't found the end of the Internal subset
10392
xmlGenericError(xmlGenericErrorContext,
10393
"PP: lookup of int subset end filed\n");
10397
found_end_int_subset:
10398
xmlParseInternalSubset(ctxt);
10399
ctxt->inSubset = 2;
10400
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10401
(ctxt->sax->externalSubset != NULL))
10402
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10403
ctxt->extSubSystem, ctxt->extSubURI);
10404
ctxt->inSubset = 0;
10405
ctxt->instate = XML_PARSER_PROLOG;
10406
ctxt->checkIndex = 0;
10408
xmlGenericError(xmlGenericErrorContext,
10409
"PP: entering PROLOG\n");
10413
case XML_PARSER_COMMENT:
10414
xmlGenericError(xmlGenericErrorContext,
10415
"PP: internal error, state == COMMENT\n");
10416
ctxt->instate = XML_PARSER_CONTENT;
10418
xmlGenericError(xmlGenericErrorContext,
10419
"PP: entering CONTENT\n");
10422
case XML_PARSER_IGNORE:
10423
xmlGenericError(xmlGenericErrorContext,
10424
"PP: internal error, state == IGNORE");
10425
ctxt->instate = XML_PARSER_DTD;
10427
xmlGenericError(xmlGenericErrorContext,
10428
"PP: entering DTD\n");
10431
case XML_PARSER_PI:
10432
xmlGenericError(xmlGenericErrorContext,
10433
"PP: internal error, state == PI\n");
10434
ctxt->instate = XML_PARSER_CONTENT;
10436
xmlGenericError(xmlGenericErrorContext,
10437
"PP: entering CONTENT\n");
10440
case XML_PARSER_ENTITY_DECL:
10441
xmlGenericError(xmlGenericErrorContext,
10442
"PP: internal error, state == ENTITY_DECL\n");
10443
ctxt->instate = XML_PARSER_DTD;
10445
xmlGenericError(xmlGenericErrorContext,
10446
"PP: entering DTD\n");
10449
case XML_PARSER_ENTITY_VALUE:
10450
xmlGenericError(xmlGenericErrorContext,
10451
"PP: internal error, state == ENTITY_VALUE\n");
10452
ctxt->instate = XML_PARSER_CONTENT;
10454
xmlGenericError(xmlGenericErrorContext,
10455
"PP: entering DTD\n");
10458
case XML_PARSER_ATTRIBUTE_VALUE:
10459
xmlGenericError(xmlGenericErrorContext,
10460
"PP: internal error, state == ATTRIBUTE_VALUE\n");
10461
ctxt->instate = XML_PARSER_START_TAG;
10463
xmlGenericError(xmlGenericErrorContext,
10464
"PP: entering START_TAG\n");
10467
case XML_PARSER_SYSTEM_LITERAL:
10468
xmlGenericError(xmlGenericErrorContext,
10469
"PP: internal error, state == SYSTEM_LITERAL\n");
10470
ctxt->instate = XML_PARSER_START_TAG;
10472
xmlGenericError(xmlGenericErrorContext,
10473
"PP: entering START_TAG\n");
10476
case XML_PARSER_PUBLIC_LITERAL:
10477
xmlGenericError(xmlGenericErrorContext,
10478
"PP: internal error, state == PUBLIC_LITERAL\n");
10479
ctxt->instate = XML_PARSER_START_TAG;
10481
xmlGenericError(xmlGenericErrorContext,
10482
"PP: entering START_TAG\n");
10489
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10496
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10497
ctxt->input->cur[0], ctxt->input->cur[1],
10498
ctxt->input->cur[2], ctxt->input->cur[3]);
10499
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10500
"Input is not proper UTF-8, indicate encoding !\n%s",
10501
BAD_CAST buffer, NULL);
10508
* @ctxt: an XML parser context
10509
* @chunk: an char array
10510
* @size: the size in byte of the chunk
10511
* @terminate: last chunk indicator
10513
* Parse a Chunk of memory
10515
* Returns zero if no error, the xmlParserErrors otherwise.
10518
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10523
return(XML_ERR_INTERNAL_ERROR);
10524
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10525
return(ctxt->errNo);
10526
if (ctxt->instate == XML_PARSER_START)
10527
xmlDetectSAX2(ctxt);
10528
if ((size > 0) && (chunk != NULL) && (!terminate) &&
10529
(chunk[size - 1] == '\r')) {
10533
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10534
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10535
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10536
int cur = ctxt->input->cur - ctxt->input->base;
10539
res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10541
ctxt->errNo = XML_PARSER_EOF;
10542
ctxt->disableSAX = 1;
10543
return (XML_PARSER_EOF);
10545
ctxt->input->base = ctxt->input->buf->buffer->content + base;
10546
ctxt->input->cur = ctxt->input->base + cur;
10548
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10550
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10553
} else if (ctxt->instate != XML_PARSER_EOF) {
10554
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10555
xmlParserInputBufferPtr in = ctxt->input->buf;
10556
if ((in->encoder != NULL) && (in->buffer != NULL) &&
10557
(in->raw != NULL)) {
10560
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10563
xmlGenericError(xmlGenericErrorContext,
10564
"xmlParseChunk: encoder error\n");
10565
return(XML_ERR_INVALID_ENCODING);
10570
xmlParseTryOrFinish(ctxt, terminate);
10571
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10572
(ctxt->input->buf != NULL)) {
10573
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10575
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10576
return(ctxt->errNo);
10579
* Check for termination
10583
if (ctxt->input != NULL) {
10584
if (ctxt->input->buf == NULL)
10585
avail = ctxt->input->length -
10586
(ctxt->input->cur - ctxt->input->base);
10588
avail = ctxt->input->buf->buffer->use -
10589
(ctxt->input->cur - ctxt->input->base);
10592
if ((ctxt->instate != XML_PARSER_EOF) &&
10593
(ctxt->instate != XML_PARSER_EPILOG)) {
10594
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10596
if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10597
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10599
if (ctxt->instate != XML_PARSER_EOF) {
10600
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10601
ctxt->sax->endDocument(ctxt->userData);
10603
ctxt->instate = XML_PARSER_EOF;
10605
return((xmlParserErrors) ctxt->errNo);
10608
/************************************************************************
10610
* I/O front end functions to the parser *
10612
************************************************************************/
10615
* xmlCreatePushParserCtxt:
10616
* @sax: a SAX handler
10617
* @user_data: The user data returned on SAX callbacks
10618
* @chunk: a pointer to an array of chars
10619
* @size: number of chars in the array
10620
* @filename: an optional file name or URI
10622
* Create a parser context for using the XML parser in push mode.
10623
* If @buffer and @size are non-NULL, the data is used to detect
10624
* the encoding. The remaining characters will be parsed so they
10625
* don't need to be fed in again through xmlParseChunk.
10626
* To allow content encoding detection, @size should be >= 4
10627
* The value of @filename is used for fetching external entities
10628
* and error/warning reports.
10630
* Returns the new parser context or NULL
10634
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10635
const char *chunk, int size, const char *filename) {
10636
xmlParserCtxtPtr ctxt;
10637
xmlParserInputPtr inputStream;
10638
xmlParserInputBufferPtr buf;
10639
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10642
* plug some encoding conversion routines
10644
if ((chunk != NULL) && (size >= 4))
10645
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10647
buf = xmlAllocParserInputBuffer(enc);
10648
if (buf == NULL) return(NULL);
10650
ctxt = xmlNewParserCtxt();
10651
if (ctxt == NULL) {
10652
xmlErrMemory(NULL, "creating parser: out of memory\n");
10653
xmlFreeParserInputBuffer(buf);
10656
ctxt->dictNames = 1;
10657
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10658
if (ctxt->pushTab == NULL) {
10659
xmlErrMemory(ctxt, NULL);
10660
xmlFreeParserInputBuffer(buf);
10661
xmlFreeParserCtxt(ctxt);
10665
#ifdef LIBXML_SAX1_ENABLED
10666
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10667
#endif /* LIBXML_SAX1_ENABLED */
10668
xmlFree(ctxt->sax);
10669
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10670
if (ctxt->sax == NULL) {
10671
xmlErrMemory(ctxt, NULL);
10672
xmlFreeParserInputBuffer(buf);
10673
xmlFreeParserCtxt(ctxt);
10676
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10677
if (sax->initialized == XML_SAX2_MAGIC)
10678
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10680
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10681
if (user_data != NULL)
10682
ctxt->userData = user_data;
10684
if (filename == NULL) {
10685
ctxt->directory = NULL;
10687
ctxt->directory = xmlParserGetDirectory(filename);
10690
inputStream = xmlNewInputStream(ctxt);
10691
if (inputStream == NULL) {
10692
xmlFreeParserCtxt(ctxt);
10693
xmlFreeParserInputBuffer(buf);
10697
if (filename == NULL)
10698
inputStream->filename = NULL;
10700
inputStream->filename = (char *)
10701
xmlCanonicPath((const xmlChar *) filename);
10702
if (inputStream->filename == NULL) {
10703
xmlFreeParserCtxt(ctxt);
10704
xmlFreeParserInputBuffer(buf);
10708
inputStream->buf = buf;
10709
inputStream->base = inputStream->buf->buffer->content;
10710
inputStream->cur = inputStream->buf->buffer->content;
10712
&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
10714
inputPush(ctxt, inputStream);
10717
* If the caller didn't provide an initial 'chunk' for determining
10718
* the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10719
* that it can be automatically determined later
10721
if ((size == 0) || (chunk == NULL)) {
10722
ctxt->charset = XML_CHAR_ENCODING_NONE;
10723
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
10724
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10725
int cur = ctxt->input->cur - ctxt->input->base;
10727
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10729
ctxt->input->base = ctxt->input->buf->buffer->content + base;
10730
ctxt->input->cur = ctxt->input->base + cur;
10732
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10734
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10738
if (enc != XML_CHAR_ENCODING_NONE) {
10739
xmlSwitchEncoding(ctxt, enc);
10744
#endif /* LIBXML_PUSH_ENABLED */
10748
* @ctxt: an XML parser context
10750
* Blocks further parser processing
10753
xmlStopParser(xmlParserCtxtPtr ctxt) {
10756
ctxt->instate = XML_PARSER_EOF;
10757
ctxt->disableSAX = 1;
10758
if (ctxt->input != NULL) {
10759
ctxt->input->cur = BAD_CAST"";
10760
ctxt->input->base = ctxt->input->cur;
10765
* xmlCreateIOParserCtxt:
10766
* @sax: a SAX handler
10767
* @user_data: The user data returned on SAX callbacks
10768
* @ioread: an I/O read function
10769
* @ioclose: an I/O close function
10770
* @ioctx: an I/O handler
10771
* @enc: the charset encoding if known
10773
* Create a parser context for using the XML parser with an existing
10776
* Returns the new parser context or NULL
10779
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10780
xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10781
void *ioctx, xmlCharEncoding enc) {
10782
xmlParserCtxtPtr ctxt;
10783
xmlParserInputPtr inputStream;
10784
xmlParserInputBufferPtr buf;
10786
if (ioread == NULL) return(NULL);
10788
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10789
if (buf == NULL) return(NULL);
10791
ctxt = xmlNewParserCtxt();
10792
if (ctxt == NULL) {
10793
xmlFreeParserInputBuffer(buf);
10797
#ifdef LIBXML_SAX1_ENABLED
10798
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10799
#endif /* LIBXML_SAX1_ENABLED */
10800
xmlFree(ctxt->sax);
10801
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10802
if (ctxt->sax == NULL) {
10803
xmlErrMemory(ctxt, NULL);
10804
xmlFreeParserCtxt(ctxt);
10807
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10808
if (sax->initialized == XML_SAX2_MAGIC)
10809
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10811
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10812
if (user_data != NULL)
10813
ctxt->userData = user_data;
10816
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10817
if (inputStream == NULL) {
10818
xmlFreeParserCtxt(ctxt);
10821
inputPush(ctxt, inputStream);
10826
#ifdef LIBXML_VALID_ENABLED
10827
/************************************************************************
10829
* Front ends when parsing a DTD *
10831
************************************************************************/
10835
* @sax: the SAX handler block or NULL
10836
* @input: an Input Buffer
10837
* @enc: the charset encoding if known
10839
* Load and parse a DTD
10841
* Returns the resulting xmlDtdPtr or NULL in case of error.
10842
* @input will be freed by the function in any case.
10846
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10847
xmlCharEncoding enc) {
10848
xmlDtdPtr ret = NULL;
10849
xmlParserCtxtPtr ctxt;
10850
xmlParserInputPtr pinput = NULL;
10856
ctxt = xmlNewParserCtxt();
10857
if (ctxt == NULL) {
10858
xmlFreeParserInputBuffer(input);
10863
* Set-up the SAX context
10866
if (ctxt->sax != NULL)
10867
xmlFree(ctxt->sax);
10869
ctxt->userData = ctxt;
10871
xmlDetectSAX2(ctxt);
10874
* generate a parser input from the I/O handler
10877
pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
10878
if (pinput == NULL) {
10879
if (sax != NULL) ctxt->sax = NULL;
10880
xmlFreeParserInputBuffer(input);
10881
xmlFreeParserCtxt(ctxt);
10886
* plug some encoding conversion routines here.
10888
xmlPushInput(ctxt, pinput);
10889
if (enc != XML_CHAR_ENCODING_NONE) {
10890
xmlSwitchEncoding(ctxt, enc);
10893
pinput->filename = NULL;
10896
pinput->base = ctxt->input->cur;
10897
pinput->cur = ctxt->input->cur;
10898
pinput->free = NULL;
10901
* let's parse that entity knowing it's an external subset.
10903
ctxt->inSubset = 2;
10904
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10905
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10906
BAD_CAST "none", BAD_CAST "none");
10908
if ((enc == XML_CHAR_ENCODING_NONE) &&
10909
((ctxt->input->end - ctxt->input->cur) >= 4)) {
10911
* Get the 4 first bytes and decode the charset
10912
* if enc != XML_CHAR_ENCODING_NONE
10913
* plug some encoding conversion routines.
10919
enc = xmlDetectCharEncoding(start, 4);
10920
if (enc != XML_CHAR_ENCODING_NONE) {
10921
xmlSwitchEncoding(ctxt, enc);
10925
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10927
if (ctxt->myDoc != NULL) {
10928
if (ctxt->wellFormed) {
10929
ret = ctxt->myDoc->extSubset;
10930
ctxt->myDoc->extSubset = NULL;
10935
tmp = ret->children;
10936
while (tmp != NULL) {
10944
xmlFreeDoc(ctxt->myDoc);
10945
ctxt->myDoc = NULL;
10947
if (sax != NULL) ctxt->sax = NULL;
10948
xmlFreeParserCtxt(ctxt);
10955
* @sax: the SAX handler block
10956
* @ExternalID: a NAME* containing the External ID of the DTD
10957
* @SystemID: a NAME* containing the URL to the DTD
10959
* Load and parse an external subset.
10961
* Returns the resulting xmlDtdPtr or NULL in case of error.
10965
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10966
const xmlChar *SystemID) {
10967
xmlDtdPtr ret = NULL;
10968
xmlParserCtxtPtr ctxt;
10969
xmlParserInputPtr input = NULL;
10970
xmlCharEncoding enc;
10971
xmlChar* systemIdCanonic;
10973
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10975
ctxt = xmlNewParserCtxt();
10976
if (ctxt == NULL) {
10981
* Set-up the SAX context
10984
if (ctxt->sax != NULL)
10985
xmlFree(ctxt->sax);
10987
ctxt->userData = ctxt;
10991
* Canonicalise the system ID
10993
systemIdCanonic = xmlCanonicPath(SystemID);
10994
if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
10995
xmlFreeParserCtxt(ctxt);
11000
* Ask the Entity resolver to load the damn thing
11003
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11004
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11006
if (input == NULL) {
11007
if (sax != NULL) ctxt->sax = NULL;
11008
xmlFreeParserCtxt(ctxt);
11009
if (systemIdCanonic != NULL)
11010
xmlFree(systemIdCanonic);
11015
* plug some encoding conversion routines here.
11017
xmlPushInput(ctxt, input);
11018
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11019
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11020
xmlSwitchEncoding(ctxt, enc);
11023
if (input->filename == NULL)
11024
input->filename = (char *) systemIdCanonic;
11026
xmlFree(systemIdCanonic);
11029
input->base = ctxt->input->cur;
11030
input->cur = ctxt->input->cur;
11031
input->free = NULL;
11034
* let's parse that entity knowing it's an external subset.
11036
ctxt->inSubset = 2;
11037
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11038
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11039
ExternalID, SystemID);
11040
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11042
if (ctxt->myDoc != NULL) {
11043
if (ctxt->wellFormed) {
11044
ret = ctxt->myDoc->extSubset;
11045
ctxt->myDoc->extSubset = NULL;
11050
tmp = ret->children;
11051
while (tmp != NULL) {
11059
xmlFreeDoc(ctxt->myDoc);
11060
ctxt->myDoc = NULL;
11062
if (sax != NULL) ctxt->sax = NULL;
11063
xmlFreeParserCtxt(ctxt);
11071
* @ExternalID: a NAME* containing the External ID of the DTD
11072
* @SystemID: a NAME* containing the URL to the DTD
11074
* Load and parse an external subset.
11076
* Returns the resulting xmlDtdPtr or NULL in case of error.
11080
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11081
return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11083
#endif /* LIBXML_VALID_ENABLED */
11085
/************************************************************************
11087
* Front ends when parsing an Entity *
11089
************************************************************************/
11092
* xmlParseCtxtExternalEntity:
11093
* @ctx: the existing parsing context
11094
* @URL: the URL for the entity to load
11095
* @ID: the System ID for the entity to load
11096
* @lst: the return value for the set of parsed nodes
11098
* Parse an external general entity within an existing parsing context
11099
* An external general parsed entity is well-formed if it matches the
11100
* production labeled extParsedEnt.
11102
* [78] extParsedEnt ::= TextDecl? content
11104
* Returns 0 if the entity is well formed, -1 in case of args problem and
11105
* the parser error code otherwise
11109
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11110
const xmlChar *ID, xmlNodePtr *lst) {
11111
xmlParserCtxtPtr ctxt;
11113
xmlNodePtr newRoot;
11114
xmlSAXHandlerPtr oldsax = NULL;
11117
xmlCharEncoding enc;
11118
xmlParserInputPtr inputStream;
11119
char *directory = NULL;
11121
if (ctx == NULL) return(-1);
11123
if (ctx->depth > 40) {
11124
return(XML_ERR_ENTITY_LOOP);
11129
if ((URL == NULL) && (ID == NULL))
11131
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11134
ctxt = xmlNewParserCtxt();
11135
if (ctxt == NULL) {
11139
ctxt->userData = ctxt;
11140
ctxt->_private = ctx->_private;
11142
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11143
if (inputStream == NULL) {
11144
xmlFreeParserCtxt(ctxt);
11148
inputPush(ctxt, inputStream);
11150
if ((ctxt->directory == NULL) && (directory == NULL))
11151
directory = xmlParserGetDirectory((char *)URL);
11152
if ((ctxt->directory == NULL) && (directory != NULL))
11153
ctxt->directory = directory;
11155
oldsax = ctxt->sax;
11156
ctxt->sax = ctx->sax;
11157
xmlDetectSAX2(ctxt);
11158
newDoc = xmlNewDoc(BAD_CAST "1.0");
11159
if (newDoc == NULL) {
11160
xmlFreeParserCtxt(ctxt);
11163
if (ctx->myDoc->dict) {
11164
newDoc->dict = ctx->myDoc->dict;
11165
xmlDictReference(newDoc->dict);
11167
if (ctx->myDoc != NULL) {
11168
newDoc->intSubset = ctx->myDoc->intSubset;
11169
newDoc->extSubset = ctx->myDoc->extSubset;
11171
if (ctx->myDoc->URL != NULL) {
11172
newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11174
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11175
if (newRoot == NULL) {
11176
ctxt->sax = oldsax;
11177
xmlFreeParserCtxt(ctxt);
11178
newDoc->intSubset = NULL;
11179
newDoc->extSubset = NULL;
11180
xmlFreeDoc(newDoc);
11183
xmlAddChild((xmlNodePtr) newDoc, newRoot);
11184
nodePush(ctxt, newDoc->children);
11185
if (ctx->myDoc == NULL) {
11186
ctxt->myDoc = newDoc;
11188
ctxt->myDoc = ctx->myDoc;
11189
newDoc->children->doc = ctx->myDoc;
11193
* Get the 4 first bytes and decode the charset
11194
* if enc != XML_CHAR_ENCODING_NONE
11195
* plug some encoding conversion routines.
11198
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11203
enc = xmlDetectCharEncoding(start, 4);
11204
if (enc != XML_CHAR_ENCODING_NONE) {
11205
xmlSwitchEncoding(ctxt, enc);
11210
* Parse a possible text declaration first
11212
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11213
xmlParseTextDecl(ctxt);
11217
* Doing validity checking on chunk doesn't make sense
11219
ctxt->instate = XML_PARSER_CONTENT;
11220
ctxt->validate = ctx->validate;
11221
ctxt->valid = ctx->valid;
11222
ctxt->loadsubset = ctx->loadsubset;
11223
ctxt->depth = ctx->depth + 1;
11224
ctxt->replaceEntities = ctx->replaceEntities;
11225
if (ctxt->validate) {
11226
ctxt->vctxt.error = ctx->vctxt.error;
11227
ctxt->vctxt.warning = ctx->vctxt.warning;
11229
ctxt->vctxt.error = NULL;
11230
ctxt->vctxt.warning = NULL;
11232
ctxt->vctxt.nodeTab = NULL;
11233
ctxt->vctxt.nodeNr = 0;
11234
ctxt->vctxt.nodeMax = 0;
11235
ctxt->vctxt.node = NULL;
11236
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11237
ctxt->dict = ctx->dict;
11238
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11239
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11240
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11241
ctxt->dictNames = ctx->dictNames;
11242
ctxt->attsDefault = ctx->attsDefault;
11243
ctxt->attsSpecial = ctx->attsSpecial;
11244
ctxt->linenumbers = ctx->linenumbers;
11246
xmlParseContent(ctxt);
11248
ctx->validate = ctxt->validate;
11249
ctx->valid = ctxt->valid;
11250
if ((RAW == '<') && (NXT(1) == '/')) {
11251
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11252
} else if (RAW != 0) {
11253
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11255
if (ctxt->node != newDoc->children) {
11256
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11259
if (!ctxt->wellFormed) {
11260
if (ctxt->errNo == 0)
11269
* Return the newly created nodeset after unlinking it from
11270
* they pseudo parent.
11272
cur = newDoc->children->children;
11274
while (cur != NULL) {
11275
cur->parent = NULL;
11278
newDoc->children->children = NULL;
11282
ctxt->sax = oldsax;
11284
ctxt->attsDefault = NULL;
11285
ctxt->attsSpecial = NULL;
11286
xmlFreeParserCtxt(ctxt);
11287
newDoc->intSubset = NULL;
11288
newDoc->extSubset = NULL;
11289
xmlFreeDoc(newDoc);
11295
* xmlParseExternalEntityPrivate:
11296
* @doc: the document the chunk pertains to
11297
* @oldctxt: the previous parser context if available
11298
* @sax: the SAX handler bloc (possibly NULL)
11299
* @user_data: The user data returned on SAX callbacks (possibly NULL)
11300
* @depth: Used for loop detection, use 0
11301
* @URL: the URL for the entity to load
11302
* @ID: the System ID for the entity to load
11303
* @list: the return value for the set of parsed nodes
11305
* Private version of xmlParseExternalEntity()
11307
* Returns 0 if the entity is well formed, -1 in case of args problem and
11308
* the parser error code otherwise
11311
static xmlParserErrors
11312
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11313
xmlSAXHandlerPtr sax,
11314
void *user_data, int depth, const xmlChar *URL,
11315
const xmlChar *ID, xmlNodePtr *list) {
11316
xmlParserCtxtPtr ctxt;
11318
xmlNodePtr newRoot;
11319
xmlSAXHandlerPtr oldsax = NULL;
11320
xmlParserErrors ret = XML_ERR_OK;
11322
xmlCharEncoding enc;
11325
return(XML_ERR_ENTITY_LOOP);
11332
if ((URL == NULL) && (ID == NULL))
11333
return(XML_ERR_INTERNAL_ERROR);
11335
return(XML_ERR_INTERNAL_ERROR);
11338
ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11339
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11340
ctxt->userData = ctxt;
11341
if (oldctxt != NULL) {
11342
ctxt->_private = oldctxt->_private;
11343
ctxt->loadsubset = oldctxt->loadsubset;
11344
ctxt->validate = oldctxt->validate;
11345
ctxt->external = oldctxt->external;
11346
ctxt->record_info = oldctxt->record_info;
11347
ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11348
ctxt->node_seq.length = oldctxt->node_seq.length;
11349
ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11352
* Doing validity checking on chunk without context
11353
* doesn't make sense
11355
ctxt->_private = NULL;
11356
ctxt->validate = 0;
11357
ctxt->external = 2;
11358
ctxt->loadsubset = 0;
11361
oldsax = ctxt->sax;
11363
if (user_data != NULL)
11364
ctxt->userData = user_data;
11366
xmlDetectSAX2(ctxt);
11367
newDoc = xmlNewDoc(BAD_CAST "1.0");
11368
if (newDoc == NULL) {
11369
ctxt->node_seq.maximum = 0;
11370
ctxt->node_seq.length = 0;
11371
ctxt->node_seq.buffer = NULL;
11372
xmlFreeParserCtxt(ctxt);
11373
return(XML_ERR_INTERNAL_ERROR);
11375
newDoc->intSubset = doc->intSubset;
11376
newDoc->extSubset = doc->extSubset;
11377
newDoc->dict = doc->dict;
11378
xmlDictReference(newDoc->dict);
11380
if (doc->URL != NULL) {
11381
newDoc->URL = xmlStrdup(doc->URL);
11383
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11384
if (newRoot == NULL) {
11386
ctxt->sax = oldsax;
11387
ctxt->node_seq.maximum = 0;
11388
ctxt->node_seq.length = 0;
11389
ctxt->node_seq.buffer = NULL;
11390
xmlFreeParserCtxt(ctxt);
11391
newDoc->intSubset = NULL;
11392
newDoc->extSubset = NULL;
11393
xmlFreeDoc(newDoc);
11394
return(XML_ERR_INTERNAL_ERROR);
11396
xmlAddChild((xmlNodePtr) newDoc, newRoot);
11397
nodePush(ctxt, newDoc->children);
11399
newRoot->doc = doc;
11402
* Get the 4 first bytes and decode the charset
11403
* if enc != XML_CHAR_ENCODING_NONE
11404
* plug some encoding conversion routines.
11407
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11412
enc = xmlDetectCharEncoding(start, 4);
11413
if (enc != XML_CHAR_ENCODING_NONE) {
11414
xmlSwitchEncoding(ctxt, enc);
11419
* Parse a possible text declaration first
11421
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11422
xmlParseTextDecl(ctxt);
11425
ctxt->instate = XML_PARSER_CONTENT;
11426
ctxt->depth = depth;
11428
xmlParseContent(ctxt);
11430
if ((RAW == '<') && (NXT(1) == '/')) {
11431
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11432
} else if (RAW != 0) {
11433
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11435
if (ctxt->node != newDoc->children) {
11436
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11439
if (!ctxt->wellFormed) {
11440
if (ctxt->errNo == 0)
11441
ret = XML_ERR_INTERNAL_ERROR;
11443
ret = (xmlParserErrors)ctxt->errNo;
11445
if (list != NULL) {
11449
* Return the newly created nodeset after unlinking it from
11450
* they pseudo parent.
11452
cur = newDoc->children->children;
11454
while (cur != NULL) {
11455
cur->parent = NULL;
11458
newDoc->children->children = NULL;
11463
ctxt->sax = oldsax;
11464
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11465
oldctxt->node_seq.length = ctxt->node_seq.length;
11466
oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11467
ctxt->node_seq.maximum = 0;
11468
ctxt->node_seq.length = 0;
11469
ctxt->node_seq.buffer = NULL;
11470
xmlFreeParserCtxt(ctxt);
11471
newDoc->intSubset = NULL;
11472
newDoc->extSubset = NULL;
11473
xmlFreeDoc(newDoc);
11478
#ifdef LIBXML_SAX1_ENABLED
11480
* xmlParseExternalEntity:
11481
* @doc: the document the chunk pertains to
11482
* @sax: the SAX handler bloc (possibly NULL)
11483
* @user_data: The user data returned on SAX callbacks (possibly NULL)
11484
* @depth: Used for loop detection, use 0
11485
* @URL: the URL for the entity to load
11486
* @ID: the System ID for the entity to load
11487
* @lst: the return value for the set of parsed nodes
11489
* Parse an external general entity
11490
* An external general parsed entity is well-formed if it matches the
11491
* production labeled extParsedEnt.
11493
* [78] extParsedEnt ::= TextDecl? content
11495
* Returns 0 if the entity is well formed, -1 in case of args problem and
11496
* the parser error code otherwise
11500
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11501
int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11502
return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11507
* xmlParseBalancedChunkMemory:
11508
* @doc: the document the chunk pertains to
11509
* @sax: the SAX handler bloc (possibly NULL)
11510
* @user_data: The user data returned on SAX callbacks (possibly NULL)
11511
* @depth: Used for loop detection, use 0
11512
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
11513
* @lst: the return value for the set of parsed nodes
11515
* Parse a well-balanced chunk of an XML document
11516
* called by the parser
11517
* The allowed sequence for the Well Balanced Chunk is the one defined by
11518
* the content production in the XML grammar:
11520
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11522
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
11523
* the parser error code otherwise
11527
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11528
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11529
return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11530
depth, string, lst, 0 );
11532
#endif /* LIBXML_SAX1_ENABLED */
11535
* xmlParseBalancedChunkMemoryInternal:
11536
* @oldctxt: the existing parsing context
11537
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
11538
* @user_data: the user data field for the parser context
11539
* @lst: the return value for the set of parsed nodes
11542
* Parse a well-balanced chunk of an XML document
11543
* called by the parser
11544
* The allowed sequence for the Well Balanced Chunk is the one defined by
11545
* the content production in the XML grammar:
11547
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11549
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
11550
* error code otherwise
11552
* In case recover is set to 1, the nodelist will not be empty even if
11553
* the parsed chunk is not well balanced.
11555
static xmlParserErrors
11556
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11557
const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11558
xmlParserCtxtPtr ctxt;
11559
xmlDocPtr newDoc = NULL;
11560
xmlNodePtr newRoot;
11561
xmlSAXHandlerPtr oldsax = NULL;
11562
xmlNodePtr content = NULL;
11563
xmlNodePtr last = NULL;
11565
xmlParserErrors ret = XML_ERR_OK;
11567
if (oldctxt->depth > 40) {
11568
return(XML_ERR_ENTITY_LOOP);
11574
if (string == NULL)
11575
return(XML_ERR_INTERNAL_ERROR);
11577
size = xmlStrlen(string);
11579
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11580
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11581
if (user_data != NULL)
11582
ctxt->userData = user_data;
11584
ctxt->userData = ctxt;
11585
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11586
ctxt->dict = oldctxt->dict;
11587
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11588
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11589
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11591
oldsax = ctxt->sax;
11592
ctxt->sax = oldctxt->sax;
11593
xmlDetectSAX2(ctxt);
11594
ctxt->replaceEntities = oldctxt->replaceEntities;
11595
ctxt->options = oldctxt->options;
11597
ctxt->_private = oldctxt->_private;
11598
if (oldctxt->myDoc == NULL) {
11599
newDoc = xmlNewDoc(BAD_CAST "1.0");
11600
if (newDoc == NULL) {
11601
ctxt->sax = oldsax;
11603
xmlFreeParserCtxt(ctxt);
11604
return(XML_ERR_INTERNAL_ERROR);
11606
newDoc->dict = ctxt->dict;
11607
xmlDictReference(newDoc->dict);
11608
ctxt->myDoc = newDoc;
11610
ctxt->myDoc = oldctxt->myDoc;
11611
content = ctxt->myDoc->children;
11612
last = ctxt->myDoc->last;
11614
newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11615
if (newRoot == NULL) {
11616
ctxt->sax = oldsax;
11618
xmlFreeParserCtxt(ctxt);
11619
if (newDoc != NULL) {
11620
xmlFreeDoc(newDoc);
11622
return(XML_ERR_INTERNAL_ERROR);
11624
ctxt->myDoc->children = NULL;
11625
ctxt->myDoc->last = NULL;
11626
xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11627
nodePush(ctxt, ctxt->myDoc->children);
11628
ctxt->instate = XML_PARSER_CONTENT;
11629
ctxt->depth = oldctxt->depth + 1;
11631
ctxt->validate = 0;
11632
ctxt->loadsubset = oldctxt->loadsubset;
11633
if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11635
* ID/IDREF registration will be done in xmlValidateElement below
11637
ctxt->loadsubset |= XML_SKIP_IDS;
11639
ctxt->dictNames = oldctxt->dictNames;
11640
ctxt->attsDefault = oldctxt->attsDefault;
11641
ctxt->attsSpecial = oldctxt->attsSpecial;
11643
xmlParseContent(ctxt);
11644
if ((RAW == '<') && (NXT(1) == '/')) {
11645
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11646
} else if (RAW != 0) {
11647
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11649
if (ctxt->node != ctxt->myDoc->children) {
11650
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11653
if (!ctxt->wellFormed) {
11654
if (ctxt->errNo == 0)
11655
ret = XML_ERR_INTERNAL_ERROR;
11657
ret = (xmlParserErrors)ctxt->errNo;
11662
if ((lst != NULL) && (ret == XML_ERR_OK)) {
11666
* Return the newly created nodeset after unlinking it from
11667
* they pseudo parent.
11669
cur = ctxt->myDoc->children->children;
11671
while (cur != NULL) {
11672
#ifdef LIBXML_VALID_ENABLED
11673
if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11674
(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11675
(cur->type == XML_ELEMENT_NODE)) {
11676
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11677
oldctxt->myDoc, cur);
11679
#endif /* LIBXML_VALID_ENABLED */
11680
cur->parent = NULL;
11683
ctxt->myDoc->children->children = NULL;
11685
if (ctxt->myDoc != NULL) {
11686
xmlFreeNode(ctxt->myDoc->children);
11687
ctxt->myDoc->children = content;
11688
ctxt->myDoc->last = last;
11691
ctxt->sax = oldsax;
11693
ctxt->attsDefault = NULL;
11694
ctxt->attsSpecial = NULL;
11695
xmlFreeParserCtxt(ctxt);
11696
if (newDoc != NULL) {
11697
xmlFreeDoc(newDoc);
11704
* xmlParseInNodeContext:
11705
* @node: the context node
11706
* @data: the input string
11707
* @datalen: the input string length in bytes
11708
* @options: a combination of xmlParserOption
11709
* @lst: the return value for the set of parsed nodes
11711
* Parse a well-balanced chunk of an XML document
11712
* within the context (DTD, namespaces, etc ...) of the given node.
11714
* The allowed sequence for the data is a Well Balanced Chunk defined by
11715
* the content production in the XML grammar:
11717
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11719
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
11720
* error code otherwise
11723
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11724
int options, xmlNodePtr *lst) {
11726
xmlParserCtxtPtr ctxt;
11727
xmlDocPtr doc = NULL;
11728
xmlNodePtr fake, cur;
11731
xmlParserErrors ret = XML_ERR_OK;
11734
* check all input parameters, grab the document
11736
if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11737
return(XML_ERR_INTERNAL_ERROR);
11738
switch (node->type) {
11739
case XML_ELEMENT_NODE:
11740
case XML_ATTRIBUTE_NODE:
11741
case XML_TEXT_NODE:
11742
case XML_CDATA_SECTION_NODE:
11743
case XML_ENTITY_REF_NODE:
11745
case XML_COMMENT_NODE:
11746
case XML_DOCUMENT_NODE:
11747
case XML_HTML_DOCUMENT_NODE:
11750
return(XML_ERR_INTERNAL_ERROR);
11753
while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11754
(node->type != XML_DOCUMENT_NODE) &&
11755
(node->type != XML_HTML_DOCUMENT_NODE))
11756
node = node->parent;
11758
return(XML_ERR_INTERNAL_ERROR);
11759
if (node->type == XML_ELEMENT_NODE)
11762
doc = (xmlDocPtr) node;
11764
return(XML_ERR_INTERNAL_ERROR);
11767
* allocate a context and set-up everything not related to the
11768
* node position in the tree
11770
if (doc->type == XML_DOCUMENT_NODE)
11771
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11772
#ifdef LIBXML_HTML_ENABLED
11773
else if (doc->type == XML_HTML_DOCUMENT_NODE)
11774
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11777
return(XML_ERR_INTERNAL_ERROR);
11780
return(XML_ERR_NO_MEMORY);
11781
fake = xmlNewComment(NULL);
11782
if (fake == NULL) {
11783
xmlFreeParserCtxt(ctxt);
11784
return(XML_ERR_NO_MEMORY);
11786
xmlAddChild(node, fake);
11789
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11790
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11791
* we must wait until the last moment to free the original one.
11793
if (doc->dict != NULL) {
11794
if (ctxt->dict != NULL)
11795
xmlDictFree(ctxt->dict);
11796
ctxt->dict = doc->dict;
11798
options |= XML_PARSE_NODICT;
11800
xmlCtxtUseOptions(ctxt, options);
11801
xmlDetectSAX2(ctxt);
11804
if (node->type == XML_ELEMENT_NODE) {
11805
nodePush(ctxt, node);
11807
* initialize the SAX2 namespaces stack
11810
while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11811
xmlNsPtr ns = cur->nsDef;
11812
const xmlChar *iprefix, *ihref;
11814
while (ns != NULL) {
11816
iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11817
ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11819
iprefix = ns->prefix;
11823
if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11824
nsPush(ctxt, iprefix, ihref);
11831
ctxt->instate = XML_PARSER_CONTENT;
11834
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11836
* ID/IDREF registration will be done in xmlValidateElement below
11838
ctxt->loadsubset |= XML_SKIP_IDS;
11841
#ifdef LIBXML_HTML_ENABLED
11842
if (doc->type == XML_HTML_DOCUMENT_NODE)
11843
__htmlParseContent(ctxt);
11846
xmlParseContent(ctxt);
11849
if ((RAW == '<') && (NXT(1) == '/')) {
11850
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11851
} else if (RAW != 0) {
11852
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11854
if ((ctxt->node != NULL) && (ctxt->node != node)) {
11855
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11856
ctxt->wellFormed = 0;
11859
if (!ctxt->wellFormed) {
11860
if (ctxt->errNo == 0)
11861
ret = XML_ERR_INTERNAL_ERROR;
11863
ret = (xmlParserErrors)ctxt->errNo;
11869
* Return the newly created nodeset after unlinking it from
11870
* the pseudo sibling.
11883
while (cur != NULL) {
11884
cur->parent = NULL;
11888
xmlUnlinkNode(fake);
11892
if (ret != XML_ERR_OK) {
11893
xmlFreeNodeList(*lst);
11897
if (doc->dict != NULL)
11899
xmlFreeParserCtxt(ctxt);
11903
return(XML_ERR_INTERNAL_ERROR);
11907
#ifdef LIBXML_SAX1_ENABLED
11909
* xmlParseBalancedChunkMemoryRecover:
11910
* @doc: the document the chunk pertains to
11911
* @sax: the SAX handler bloc (possibly NULL)
11912
* @user_data: The user data returned on SAX callbacks (possibly NULL)
11913
* @depth: Used for loop detection, use 0
11914
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
11915
* @lst: the return value for the set of parsed nodes
11916
* @recover: return nodes even if the data is broken (use 0)
11919
* Parse a well-balanced chunk of an XML document
11920
* called by the parser
11921
* The allowed sequence for the Well Balanced Chunk is the one defined by
11922
* the content production in the XML grammar:
11924
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11926
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
11927
* the parser error code otherwise
11929
* In case recover is set to 1, the nodelist will not be empty even if
11930
* the parsed chunk is not well balanced.
11933
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11934
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11936
xmlParserCtxtPtr ctxt;
11938
xmlSAXHandlerPtr oldsax = NULL;
11939
xmlNodePtr content, newRoot;
11944
return(XML_ERR_ENTITY_LOOP);
11950
if (string == NULL)
11953
size = xmlStrlen(string);
11955
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11956
if (ctxt == NULL) return(-1);
11957
ctxt->userData = ctxt;
11959
oldsax = ctxt->sax;
11961
if (user_data != NULL)
11962
ctxt->userData = user_data;
11964
newDoc = xmlNewDoc(BAD_CAST "1.0");
11965
if (newDoc == NULL) {
11966
xmlFreeParserCtxt(ctxt);
11969
if ((doc != NULL) && (doc->dict != NULL)) {
11970
xmlDictFree(ctxt->dict);
11971
ctxt->dict = doc->dict;
11972
xmlDictReference(ctxt->dict);
11973
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11974
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11975
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11976
ctxt->dictNames = 1;
11978
xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11981
newDoc->intSubset = doc->intSubset;
11982
newDoc->extSubset = doc->extSubset;
11984
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11985
if (newRoot == NULL) {
11987
ctxt->sax = oldsax;
11988
xmlFreeParserCtxt(ctxt);
11989
newDoc->intSubset = NULL;
11990
newDoc->extSubset = NULL;
11991
xmlFreeDoc(newDoc);
11994
xmlAddChild((xmlNodePtr) newDoc, newRoot);
11995
nodePush(ctxt, newRoot);
11997
ctxt->myDoc = newDoc;
11999
ctxt->myDoc = newDoc;
12000
newDoc->children->doc = doc;
12001
/* Ensure that doc has XML spec namespace */
12002
xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12003
newDoc->oldNs = doc->oldNs;
12005
ctxt->instate = XML_PARSER_CONTENT;
12006
ctxt->depth = depth;
12009
* Doing validity checking on chunk doesn't make sense
12011
ctxt->validate = 0;
12012
ctxt->loadsubset = 0;
12013
xmlDetectSAX2(ctxt);
12015
if ( doc != NULL ){
12016
content = doc->children;
12017
doc->children = NULL;
12018
xmlParseContent(ctxt);
12019
doc->children = content;
12022
xmlParseContent(ctxt);
12024
if ((RAW == '<') && (NXT(1) == '/')) {
12025
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12026
} else if (RAW != 0) {
12027
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12029
if (ctxt->node != newDoc->children) {
12030
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12033
if (!ctxt->wellFormed) {
12034
if (ctxt->errNo == 0)
12042
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12046
* Return the newly created nodeset after unlinking it from
12047
* they pseudo parent.
12049
cur = newDoc->children->children;
12051
while (cur != NULL) {
12052
xmlSetTreeDoc(cur, doc);
12053
cur->parent = NULL;
12056
newDoc->children->children = NULL;
12060
ctxt->sax = oldsax;
12061
xmlFreeParserCtxt(ctxt);
12062
newDoc->intSubset = NULL;
12063
newDoc->extSubset = NULL;
12064
newDoc->oldNs = NULL;
12065
xmlFreeDoc(newDoc);
12071
* xmlSAXParseEntity:
12072
* @sax: the SAX handler block
12073
* @filename: the filename
12075
* parse an XML external entity out of context and build a tree.
12076
* It use the given SAX function block to handle the parsing callback.
12077
* If sax is NULL, fallback to the default DOM tree building routines.
12079
* [78] extParsedEnt ::= TextDecl? content
12081
* This correspond to a "Well Balanced" chunk
12083
* Returns the resulting document tree
12087
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12089
xmlParserCtxtPtr ctxt;
12091
ctxt = xmlCreateFileParserCtxt(filename);
12092
if (ctxt == NULL) {
12096
if (ctxt->sax != NULL)
12097
xmlFree(ctxt->sax);
12099
ctxt->userData = NULL;
12102
xmlParseExtParsedEnt(ctxt);
12104
if (ctxt->wellFormed)
12108
xmlFreeDoc(ctxt->myDoc);
12109
ctxt->myDoc = NULL;
12113
xmlFreeParserCtxt(ctxt);
12120
* @filename: the filename
12122
* parse an XML external entity out of context and build a tree.
12124
* [78] extParsedEnt ::= TextDecl? content
12126
* This correspond to a "Well Balanced" chunk
12128
* Returns the resulting document tree
12132
xmlParseEntity(const char *filename) {
12133
return(xmlSAXParseEntity(NULL, filename));
12135
#endif /* LIBXML_SAX1_ENABLED */
12138
* xmlCreateEntityParserCtxt:
12139
* @URL: the entity URL
12140
* @ID: the entity PUBLIC ID
12141
* @base: a possible base for the target URI
12143
* Create a parser context for an external entity
12144
* Automatic support for ZLIB/Compress compressed document is provided
12145
* by default if found at compile-time.
12147
* Returns the new parser context or NULL
12150
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12151
const xmlChar *base) {
12152
xmlParserCtxtPtr ctxt;
12153
xmlParserInputPtr inputStream;
12154
char *directory = NULL;
12157
ctxt = xmlNewParserCtxt();
12158
if (ctxt == NULL) {
12162
uri = xmlBuildURI(URL, base);
12165
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12166
if (inputStream == NULL) {
12167
xmlFreeParserCtxt(ctxt);
12171
inputPush(ctxt, inputStream);
12173
if ((ctxt->directory == NULL) && (directory == NULL))
12174
directory = xmlParserGetDirectory((char *)URL);
12175
if ((ctxt->directory == NULL) && (directory != NULL))
12176
ctxt->directory = directory;
12178
inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12179
if (inputStream == NULL) {
12181
xmlFreeParserCtxt(ctxt);
12185
inputPush(ctxt, inputStream);
12187
if ((ctxt->directory == NULL) && (directory == NULL))
12188
directory = xmlParserGetDirectory((char *)uri);
12189
if ((ctxt->directory == NULL) && (directory != NULL))
12190
ctxt->directory = directory;
12196
/************************************************************************
12198
* Front ends when parsing from a file *
12200
************************************************************************/
12203
* xmlCreateURLParserCtxt:
12204
* @filename: the filename or URL
12205
* @options: a combination of xmlParserOption
12207
* Create a parser context for a file or URL content.
12208
* Automatic support for ZLIB/Compress compressed document is provided
12209
* by default if found at compile-time and for file accesses
12211
* Returns the new parser context or NULL
12214
xmlCreateURLParserCtxt(const char *filename, int options)
12216
xmlParserCtxtPtr ctxt;
12217
xmlParserInputPtr inputStream;
12218
char *directory = NULL;
12220
ctxt = xmlNewParserCtxt();
12221
if (ctxt == NULL) {
12222
xmlErrMemory(NULL, "cannot allocate parser context");
12227
xmlCtxtUseOptions(ctxt, options);
12228
ctxt->linenumbers = 1;
12230
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12231
if (inputStream == NULL) {
12232
xmlFreeParserCtxt(ctxt);
12236
inputPush(ctxt, inputStream);
12237
if ((ctxt->directory == NULL) && (directory == NULL))
12238
directory = xmlParserGetDirectory(filename);
12239
if ((ctxt->directory == NULL) && (directory != NULL))
12240
ctxt->directory = directory;
12246
* xmlCreateFileParserCtxt:
12247
* @filename: the filename
12249
* Create a parser context for a file content.
12250
* Automatic support for ZLIB/Compress compressed document is provided
12251
* by default if found at compile-time.
12253
* Returns the new parser context or NULL
12256
xmlCreateFileParserCtxt(const char *filename)
12258
return(xmlCreateURLParserCtxt(filename, 0));
12261
#ifdef LIBXML_SAX1_ENABLED
12263
* xmlSAXParseFileWithData:
12264
* @sax: the SAX handler block
12265
* @filename: the filename
12266
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
12268
* @data: the userdata
12270
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
12271
* compressed document is provided by default if found at compile-time.
12272
* It use the given SAX function block to handle the parsing callback.
12273
* If sax is NULL, fallback to the default DOM tree building routines.
12275
* User data (void *) is stored within the parser context in the
12276
* context's _private member, so it is available nearly everywhere in libxml
12278
* Returns the resulting document tree
12282
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12283
int recovery, void *data) {
12285
xmlParserCtxtPtr ctxt;
12286
char *directory = NULL;
12290
ctxt = xmlCreateFileParserCtxt(filename);
12291
if (ctxt == NULL) {
12295
if (ctxt->sax != NULL)
12296
xmlFree(ctxt->sax);
12299
xmlDetectSAX2(ctxt);
12301
ctxt->_private = data;
12304
if ((ctxt->directory == NULL) && (directory == NULL))
12305
directory = xmlParserGetDirectory(filename);
12306
if ((ctxt->directory == NULL) && (directory != NULL))
12307
ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12309
ctxt->recovery = recovery;
12311
xmlParseDocument(ctxt);
12313
if ((ctxt->wellFormed) || recovery) {
12316
if (ctxt->input->buf->compressed > 0)
12317
ret->compression = 9;
12319
ret->compression = ctxt->input->buf->compressed;
12324
xmlFreeDoc(ctxt->myDoc);
12325
ctxt->myDoc = NULL;
12329
xmlFreeParserCtxt(ctxt);
12336
* @sax: the SAX handler block
12337
* @filename: the filename
12338
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
12341
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
12342
* compressed document is provided by default if found at compile-time.
12343
* It use the given SAX function block to handle the parsing callback.
12344
* If sax is NULL, fallback to the default DOM tree building routines.
12346
* Returns the resulting document tree
12350
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12352
return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12357
* @cur: a pointer to an array of xmlChar
12359
* parse an XML in-memory document and build a tree.
12360
* In the case the document is not Well Formed, a tree is built anyway
12362
* Returns the resulting document tree
12366
xmlRecoverDoc(xmlChar *cur) {
12367
return(xmlSAXParseDoc(NULL, cur, 1));
12372
* @filename: the filename
12374
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
12375
* compressed document is provided by default if found at compile-time.
12377
* Returns the resulting document tree if the file was wellformed,
12382
xmlParseFile(const char *filename) {
12383
return(xmlSAXParseFile(NULL, filename, 0));
12388
* @filename: the filename
12390
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
12391
* compressed document is provided by default if found at compile-time.
12392
* In the case the document is not Well Formed, a tree is built anyway
12394
* Returns the resulting document tree
12398
xmlRecoverFile(const char *filename) {
12399
return(xmlSAXParseFile(NULL, filename, 1));
12404
* xmlSetupParserForBuffer:
12405
* @ctxt: an XML parser context
12406
* @buffer: a xmlChar * buffer
12407
* @filename: a file name
12409
* Setup the parser context to parse a new buffer; Clears any prior
12410
* contents from the parser context. The buffer parameter must not be
12411
* NULL, but the filename parameter can be
12414
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12415
const char* filename)
12417
xmlParserInputPtr input;
12419
if ((ctxt == NULL) || (buffer == NULL))
12422
input = xmlNewInputStream(ctxt);
12423
if (input == NULL) {
12424
xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12425
xmlClearParserCtxt(ctxt);
12429
xmlClearParserCtxt(ctxt);
12430
if (filename != NULL)
12431
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12432
input->base = buffer;
12433
input->cur = buffer;
12434
input->end = &buffer[xmlStrlen(buffer)];
12435
inputPush(ctxt, input);
12439
* xmlSAXUserParseFile:
12440
* @sax: a SAX handler
12441
* @user_data: The user data returned on SAX callbacks
12442
* @filename: a file name
12444
* parse an XML file and call the given SAX handler routines.
12445
* Automatic support for ZLIB/Compress compressed document is provided
12447
* Returns 0 in case of success or a error number otherwise
12450
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12451
const char *filename) {
12453
xmlParserCtxtPtr ctxt;
12455
ctxt = xmlCreateFileParserCtxt(filename);
12456
if (ctxt == NULL) return -1;
12457
#ifdef LIBXML_SAX1_ENABLED
12458
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12459
#endif /* LIBXML_SAX1_ENABLED */
12460
xmlFree(ctxt->sax);
12462
xmlDetectSAX2(ctxt);
12464
if (user_data != NULL)
12465
ctxt->userData = user_data;
12467
xmlParseDocument(ctxt);
12469
if (ctxt->wellFormed)
12472
if (ctxt->errNo != 0)
12479
if (ctxt->myDoc != NULL) {
12480
xmlFreeDoc(ctxt->myDoc);
12481
ctxt->myDoc = NULL;
12483
xmlFreeParserCtxt(ctxt);
12487
#endif /* LIBXML_SAX1_ENABLED */
12489
/************************************************************************
12491
* Front ends when parsing from memory *
12493
************************************************************************/
12496
* xmlCreateMemoryParserCtxt:
12497
* @buffer: a pointer to a char array
12498
* @size: the size of the array
12500
* Create a parser context for an XML in-memory document.
12502
* Returns the new parser context or NULL
12505
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12506
xmlParserCtxtPtr ctxt;
12507
xmlParserInputPtr input;
12508
xmlParserInputBufferPtr buf;
12510
if (buffer == NULL)
12515
ctxt = xmlNewParserCtxt();
12519
/* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12520
buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12522
xmlFreeParserCtxt(ctxt);
12526
input = xmlNewInputStream(ctxt);
12527
if (input == NULL) {
12528
xmlFreeParserInputBuffer(buf);
12529
xmlFreeParserCtxt(ctxt);
12533
input->filename = NULL;
12535
input->base = input->buf->buffer->content;
12536
input->cur = input->buf->buffer->content;
12537
input->end = &input->buf->buffer->content[input->buf->buffer->use];
12539
inputPush(ctxt, input);
12543
#ifdef LIBXML_SAX1_ENABLED
12545
* xmlSAXParseMemoryWithData:
12546
* @sax: the SAX handler block
12547
* @buffer: an pointer to a char array
12548
* @size: the size of the array
12549
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
12551
* @data: the userdata
12553
* parse an XML in-memory block and use the given SAX function block
12554
* to handle the parsing callback. If sax is NULL, fallback to the default
12555
* DOM tree building routines.
12557
* User data (void *) is stored within the parser context in the
12558
* context's _private member, so it is available nearly everywhere in libxml
12560
* Returns the resulting document tree
12564
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12565
int size, int recovery, void *data) {
12567
xmlParserCtxtPtr ctxt;
12569
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12570
if (ctxt == NULL) return(NULL);
12572
if (ctxt->sax != NULL)
12573
xmlFree(ctxt->sax);
12576
xmlDetectSAX2(ctxt);
12578
ctxt->_private=data;
12581
ctxt->recovery = recovery;
12583
xmlParseDocument(ctxt);
12585
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12588
xmlFreeDoc(ctxt->myDoc);
12589
ctxt->myDoc = NULL;
12593
xmlFreeParserCtxt(ctxt);
12599
* xmlSAXParseMemory:
12600
* @sax: the SAX handler block
12601
* @buffer: an pointer to a char array
12602
* @size: the size of the array
12603
* @recovery: work in recovery mode, i.e. tries to read not Well Formed
12606
* parse an XML in-memory block and use the given SAX function block
12607
* to handle the parsing callback. If sax is NULL, fallback to the default
12608
* DOM tree building routines.
12610
* Returns the resulting document tree
12613
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12614
int size, int recovery) {
12615
return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12620
* @buffer: an pointer to a char array
12621
* @size: the size of the array
12623
* parse an XML in-memory block and build a tree.
12625
* Returns the resulting document tree
12628
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12629
return(xmlSAXParseMemory(NULL, buffer, size, 0));
12633
* xmlRecoverMemory:
12634
* @buffer: an pointer to a char array
12635
* @size: the size of the array
12637
* parse an XML in-memory block and build a tree.
12638
* In the case the document is not Well Formed, a tree is built anyway
12640
* Returns the resulting document tree
12643
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12644
return(xmlSAXParseMemory(NULL, buffer, size, 1));
12648
* xmlSAXUserParseMemory:
12649
* @sax: a SAX handler
12650
* @user_data: The user data returned on SAX callbacks
12651
* @buffer: an in-memory XML document input
12652
* @size: the length of the XML document in bytes
12654
* A better SAX parsing routine.
12655
* parse an XML in-memory buffer and call the given SAX handler routines.
12657
* Returns 0 in case of success or a error number otherwise
12659
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12660
const char *buffer, int size) {
12662
xmlParserCtxtPtr ctxt;
12663
xmlSAXHandlerPtr oldsax = NULL;
12665
if (sax == NULL) return -1;
12666
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12667
if (ctxt == NULL) return -1;
12668
oldsax = ctxt->sax;
12670
xmlDetectSAX2(ctxt);
12671
if (user_data != NULL)
12672
ctxt->userData = user_data;
12674
xmlParseDocument(ctxt);
12676
if (ctxt->wellFormed)
12679
if (ctxt->errNo != 0)
12684
ctxt->sax = oldsax;
12685
if (ctxt->myDoc != NULL) {
12686
xmlFreeDoc(ctxt->myDoc);
12687
ctxt->myDoc = NULL;
12689
xmlFreeParserCtxt(ctxt);
12693
#endif /* LIBXML_SAX1_ENABLED */
12696
* xmlCreateDocParserCtxt:
12697
* @cur: a pointer to an array of xmlChar
12699
* Creates a parser context for an XML in-memory document.
12701
* Returns the new parser context or NULL
12704
xmlCreateDocParserCtxt(const xmlChar *cur) {
12709
len = xmlStrlen(cur);
12710
return(xmlCreateMemoryParserCtxt((const char *)cur, len));
12713
#ifdef LIBXML_SAX1_ENABLED
12716
* @sax: the SAX handler block
12717
* @cur: a pointer to an array of xmlChar
12718
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
12721
* parse an XML in-memory document and build a tree.
12722
* It use the given SAX function block to handle the parsing callback.
12723
* If sax is NULL, fallback to the default DOM tree building routines.
12725
* Returns the resulting document tree
12729
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
12731
xmlParserCtxtPtr ctxt;
12732
xmlSAXHandlerPtr oldsax = NULL;
12734
if (cur == NULL) return(NULL);
12737
ctxt = xmlCreateDocParserCtxt(cur);
12738
if (ctxt == NULL) return(NULL);
12740
oldsax = ctxt->sax;
12742
ctxt->userData = NULL;
12744
xmlDetectSAX2(ctxt);
12746
xmlParseDocument(ctxt);
12747
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12750
xmlFreeDoc(ctxt->myDoc);
12751
ctxt->myDoc = NULL;
12754
ctxt->sax = oldsax;
12755
xmlFreeParserCtxt(ctxt);
12762
* @cur: a pointer to an array of xmlChar
12764
* parse an XML in-memory document and build a tree.
12766
* Returns the resulting document tree
12770
xmlParseDoc(const xmlChar *cur) {
12771
return(xmlSAXParseDoc(NULL, cur, 0));
12773
#endif /* LIBXML_SAX1_ENABLED */
12775
#ifdef LIBXML_LEGACY_ENABLED
12776
/************************************************************************
12778
* Specific function to keep track of entities references *
12779
* and used by the XSLT debugger *
12781
************************************************************************/
12783
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12786
* xmlAddEntityReference:
12787
* @ent : A valid entity
12788
* @firstNode : A valid first node for children of entity
12789
* @lastNode : A valid last node of children entity
12791
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12794
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12795
xmlNodePtr lastNode)
12797
if (xmlEntityRefFunc != NULL) {
12798
(*xmlEntityRefFunc) (ent, firstNode, lastNode);
12804
* xmlSetEntityReferenceFunc:
12805
* @func: A valid function
12807
* Set the function to call call back when a xml reference has been made
12810
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12812
xmlEntityRefFunc = func;
12814
#endif /* LIBXML_LEGACY_ENABLED */
12816
/************************************************************************
12820
************************************************************************/
12822
#ifdef LIBXML_XPATH_ENABLED
12823
#include <libxml/xpath.h>
12826
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
12827
static int xmlParserInitialized = 0;
12832
* Initialization function for the XML parser.
12833
* This is not reentrant. Call once before processing in case of
12834
* use in multithreaded programs.
12838
xmlInitParser(void) {
12839
if (xmlParserInitialized != 0)
12842
if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12843
(xmlGenericError == NULL))
12844
initGenericErrorDefaultFunc(NULL);
12848
xmlInitCharEncodingHandlers();
12849
xmlDefaultSAXHandlerInit();
12850
xmlRegisterDefaultInputCallbacks();
12851
#ifdef LIBXML_OUTPUT_ENABLED
12852
xmlRegisterDefaultOutputCallbacks();
12853
#endif /* LIBXML_OUTPUT_ENABLED */
12854
#ifdef LIBXML_HTML_ENABLED
12855
htmlInitAutoClose();
12856
htmlDefaultSAXHandlerInit();
12858
#ifdef LIBXML_XPATH_ENABLED
12861
xmlParserInitialized = 1;
12865
* xmlCleanupParser:
12867
* Cleanup function for the XML library. It tries to reclaim all
12868
* parsing related global memory allocated for the library processing.
12869
* It doesn't deallocate any document related memory. Calling this
12870
* function should not prevent reusing the library but one should
12871
* call xmlCleanupParser() only when the process has
12872
* finished using the library or XML document built with it.
12876
xmlCleanupParser(void) {
12877
if (!xmlParserInitialized)
12880
xmlCleanupCharEncodingHandlers();
12881
#ifdef LIBXML_CATALOG_ENABLED
12882
xmlCatalogCleanup();
12885
xmlCleanupInputCallbacks();
12886
#ifdef LIBXML_OUTPUT_ENABLED
12887
xmlCleanupOutputCallbacks();
12889
#ifdef LIBXML_SCHEMAS_ENABLED
12890
xmlSchemaCleanupTypes();
12891
xmlRelaxNGCleanupTypes();
12893
xmlCleanupGlobals();
12894
xmlResetLastError();
12895
xmlCleanupThreads(); /* must be last if called not from the main thread */
12896
xmlCleanupMemory();
12897
xmlParserInitialized = 0;
12900
/************************************************************************
12902
* New set (2.6.0) of simpler and more flexible APIs *
12904
************************************************************************/
12910
* Free a string if it is not owned by the "dict" dictionnary in the
12913
#define DICT_FREE(str) \
12914
if ((str) && ((!dict) || \
12915
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12916
xmlFree((char *)(str));
12920
* @ctxt: an XML parser context
12922
* Reset a parser context
12925
xmlCtxtReset(xmlParserCtxtPtr ctxt)
12927
xmlParserInputPtr input;
12935
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12936
xmlFreeInputStream(input);
12939
ctxt->input = NULL;
12942
ctxt->spaceTab[0] = -1;
12943
ctxt->space = &ctxt->spaceTab[0];
12952
DICT_FREE(ctxt->version);
12953
ctxt->version = NULL;
12954
DICT_FREE(ctxt->encoding);
12955
ctxt->encoding = NULL;
12956
DICT_FREE(ctxt->directory);
12957
ctxt->directory = NULL;
12958
DICT_FREE(ctxt->extSubURI);
12959
ctxt->extSubURI = NULL;
12960
DICT_FREE(ctxt->extSubSystem);
12961
ctxt->extSubSystem = NULL;
12962
if (ctxt->myDoc != NULL)
12963
xmlFreeDoc(ctxt->myDoc);
12964
ctxt->myDoc = NULL;
12966
ctxt->standalone = -1;
12967
ctxt->hasExternalSubset = 0;
12968
ctxt->hasPErefs = 0;
12970
ctxt->external = 0;
12971
ctxt->instate = XML_PARSER_START;
12974
ctxt->wellFormed = 1;
12975
ctxt->nsWellFormed = 1;
12976
ctxt->disableSAX = 0;
12979
ctxt->vctxt.userData = ctxt;
12980
ctxt->vctxt.error = xmlParserValidityError;
12981
ctxt->vctxt.warning = xmlParserValidityWarning;
12983
ctxt->record_info = 0;
12985
ctxt->checkIndex = 0;
12986
ctxt->inSubset = 0;
12987
ctxt->errNo = XML_ERR_OK;
12989
ctxt->charset = XML_CHAR_ENCODING_UTF8;
12990
ctxt->catalogs = NULL;
12991
xmlInitNodeInfoSeq(&ctxt->node_seq);
12993
if (ctxt->attsDefault != NULL) {
12994
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12995
ctxt->attsDefault = NULL;
12997
if (ctxt->attsSpecial != NULL) {
12998
xmlHashFree(ctxt->attsSpecial, NULL);
12999
ctxt->attsSpecial = NULL;
13002
#ifdef LIBXML_CATALOG_ENABLED
13003
if (ctxt->catalogs != NULL)
13004
xmlCatalogFreeLocal(ctxt->catalogs);
13006
if (ctxt->lastError.code != XML_ERR_OK)
13007
xmlResetError(&ctxt->lastError);
13011
* xmlCtxtResetPush:
13012
* @ctxt: an XML parser context
13013
* @chunk: a pointer to an array of chars
13014
* @size: number of chars in the array
13015
* @filename: an optional file name or URI
13016
* @encoding: the document encoding, or NULL
13018
* Reset a push parser context
13020
* Returns 0 in case of success and 1 in case of error
13023
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13024
int size, const char *filename, const char *encoding)
13026
xmlParserInputPtr inputStream;
13027
xmlParserInputBufferPtr buf;
13028
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13033
if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13034
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13036
buf = xmlAllocParserInputBuffer(enc);
13040
if (ctxt == NULL) {
13041
xmlFreeParserInputBuffer(buf);
13045
xmlCtxtReset(ctxt);
13047
if (ctxt->pushTab == NULL) {
13048
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13049
sizeof(xmlChar *));
13050
if (ctxt->pushTab == NULL) {
13051
xmlErrMemory(ctxt, NULL);
13052
xmlFreeParserInputBuffer(buf);
13057
if (filename == NULL) {
13058
ctxt->directory = NULL;
13060
ctxt->directory = xmlParserGetDirectory(filename);
13063
inputStream = xmlNewInputStream(ctxt);
13064
if (inputStream == NULL) {
13065
xmlFreeParserInputBuffer(buf);
13069
if (filename == NULL)
13070
inputStream->filename = NULL;
13072
inputStream->filename = (char *)
13073
xmlCanonicPath((const xmlChar *) filename);
13074
inputStream->buf = buf;
13075
inputStream->base = inputStream->buf->buffer->content;
13076
inputStream->cur = inputStream->buf->buffer->content;
13078
&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13080
inputPush(ctxt, inputStream);
13082
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13083
(ctxt->input->buf != NULL)) {
13084
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13085
int cur = ctxt->input->cur - ctxt->input->base;
13087
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13089
ctxt->input->base = ctxt->input->buf->buffer->content + base;
13090
ctxt->input->cur = ctxt->input->base + cur;
13092
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13095
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13099
if (encoding != NULL) {
13100
xmlCharEncodingHandlerPtr hdlr;
13102
hdlr = xmlFindCharEncodingHandler(encoding);
13103
if (hdlr != NULL) {
13104
xmlSwitchToEncoding(ctxt, hdlr);
13106
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13107
"Unsupported encoding %s\n", BAD_CAST encoding);
13109
} else if (enc != XML_CHAR_ENCODING_NONE) {
13110
xmlSwitchEncoding(ctxt, enc);
13117
* xmlCtxtUseOptions:
13118
* @ctxt: an XML parser context
13119
* @options: a combination of xmlParserOption
13121
* Applies the options to the parser context
13123
* Returns 0 in case of success, the set of unknown or unimplemented options
13124
* in case of error.
13127
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13131
if (options & XML_PARSE_RECOVER) {
13132
ctxt->recovery = 1;
13133
options -= XML_PARSE_RECOVER;
13135
ctxt->recovery = 0;
13136
if (options & XML_PARSE_DTDLOAD) {
13137
ctxt->loadsubset = XML_DETECT_IDS;
13138
options -= XML_PARSE_DTDLOAD;
13140
ctxt->loadsubset = 0;
13141
if (options & XML_PARSE_DTDATTR) {
13142
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13143
options -= XML_PARSE_DTDATTR;
13145
if (options & XML_PARSE_NOENT) {
13146
ctxt->replaceEntities = 1;
13147
/* ctxt->loadsubset |= XML_DETECT_IDS; */
13148
options -= XML_PARSE_NOENT;
13150
ctxt->replaceEntities = 0;
13151
if (options & XML_PARSE_PEDANTIC) {
13152
ctxt->pedantic = 1;
13153
options -= XML_PARSE_PEDANTIC;
13155
ctxt->pedantic = 0;
13156
if (options & XML_PARSE_NOBLANKS) {
13157
ctxt->keepBlanks = 0;
13158
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13159
options -= XML_PARSE_NOBLANKS;
13161
ctxt->keepBlanks = 1;
13162
if (options & XML_PARSE_DTDVALID) {
13163
ctxt->validate = 1;
13164
if (options & XML_PARSE_NOWARNING)
13165
ctxt->vctxt.warning = NULL;
13166
if (options & XML_PARSE_NOERROR)
13167
ctxt->vctxt.error = NULL;
13168
options -= XML_PARSE_DTDVALID;
13170
ctxt->validate = 0;
13171
if (options & XML_PARSE_NOWARNING) {
13172
ctxt->sax->warning = NULL;
13173
options -= XML_PARSE_NOWARNING;
13175
if (options & XML_PARSE_NOERROR) {
13176
ctxt->sax->error = NULL;
13177
ctxt->sax->fatalError = NULL;
13178
options -= XML_PARSE_NOERROR;
13180
#ifdef LIBXML_SAX1_ENABLED
13181
if (options & XML_PARSE_SAX1) {
13182
ctxt->sax->startElement = xmlSAX2StartElement;
13183
ctxt->sax->endElement = xmlSAX2EndElement;
13184
ctxt->sax->startElementNs = NULL;
13185
ctxt->sax->endElementNs = NULL;
13186
ctxt->sax->initialized = 1;
13187
options -= XML_PARSE_SAX1;
13189
#endif /* LIBXML_SAX1_ENABLED */
13190
if (options & XML_PARSE_NODICT) {
13191
ctxt->dictNames = 0;
13192
options -= XML_PARSE_NODICT;
13194
ctxt->dictNames = 1;
13196
if (options & XML_PARSE_NOCDATA) {
13197
ctxt->sax->cdataBlock = NULL;
13198
options -= XML_PARSE_NOCDATA;
13200
if (options & XML_PARSE_NSCLEAN) {
13201
ctxt->options |= XML_PARSE_NSCLEAN;
13202
options -= XML_PARSE_NSCLEAN;
13204
if (options & XML_PARSE_NONET) {
13205
ctxt->options |= XML_PARSE_NONET;
13206
options -= XML_PARSE_NONET;
13208
if (options & XML_PARSE_COMPACT) {
13209
ctxt->options |= XML_PARSE_COMPACT;
13210
options -= XML_PARSE_COMPACT;
13212
ctxt->linenumbers = 1;
13218
* @ctxt: an XML parser context
13219
* @URL: the base URL to use for the document
13220
* @encoding: the document encoding, or NULL
13221
* @options: a combination of xmlParserOption
13222
* @reuse: keep the context for reuse
13224
* Common front-end for the xmlRead functions
13226
* Returns the resulting document tree or NULL
13229
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13230
int options, int reuse)
13234
xmlCtxtUseOptions(ctxt, options);
13235
if (encoding != NULL) {
13236
xmlCharEncodingHandlerPtr hdlr;
13238
hdlr = xmlFindCharEncodingHandler(encoding);
13240
xmlSwitchToEncoding(ctxt, hdlr);
13242
if ((URL != NULL) && (ctxt->input != NULL) &&
13243
(ctxt->input->filename == NULL))
13244
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13245
xmlParseDocument(ctxt);
13246
if ((ctxt->wellFormed) || ctxt->recovery)
13250
if (ctxt->myDoc != NULL) {
13251
xmlFreeDoc(ctxt->myDoc);
13254
ctxt->myDoc = NULL;
13256
xmlFreeParserCtxt(ctxt);
13264
* @cur: a pointer to a zero terminated string
13265
* @URL: the base URL to use for the document
13266
* @encoding: the document encoding, or NULL
13267
* @options: a combination of xmlParserOption
13269
* parse an XML in-memory document and build a tree.
13271
* Returns the resulting document tree
13274
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13276
xmlParserCtxtPtr ctxt;
13281
ctxt = xmlCreateDocParserCtxt(cur);
13284
return (xmlDoRead(ctxt, URL, encoding, options, 0));
13289
* @filename: a file or URL
13290
* @encoding: the document encoding, or NULL
13291
* @options: a combination of xmlParserOption
13293
* parse an XML file from the filesystem or the network.
13295
* Returns the resulting document tree
13298
xmlReadFile(const char *filename, const char *encoding, int options)
13300
xmlParserCtxtPtr ctxt;
13302
ctxt = xmlCreateURLParserCtxt(filename, options);
13305
return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13310
* @buffer: a pointer to a char array
13311
* @size: the size of the array
13312
* @URL: the base URL to use for the document
13313
* @encoding: the document encoding, or NULL
13314
* @options: a combination of xmlParserOption
13316
* parse an XML in-memory document and build a tree.
13318
* Returns the resulting document tree
13321
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13323
xmlParserCtxtPtr ctxt;
13325
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13328
return (xmlDoRead(ctxt, URL, encoding, options, 0));
13333
* @fd: an open file descriptor
13334
* @URL: the base URL to use for the document
13335
* @encoding: the document encoding, or NULL
13336
* @options: a combination of xmlParserOption
13338
* parse an XML from a file descriptor and build a tree.
13339
* NOTE that the file descriptor will not be closed when the
13340
* reader is closed or reset.
13342
* Returns the resulting document tree
13345
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13347
xmlParserCtxtPtr ctxt;
13348
xmlParserInputBufferPtr input;
13349
xmlParserInputPtr stream;
13354
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13357
input->closecallback = NULL;
13358
ctxt = xmlNewParserCtxt();
13359
if (ctxt == NULL) {
13360
xmlFreeParserInputBuffer(input);
13363
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13364
if (stream == NULL) {
13365
xmlFreeParserInputBuffer(input);
13366
xmlFreeParserCtxt(ctxt);
13369
inputPush(ctxt, stream);
13370
return (xmlDoRead(ctxt, URL, encoding, options, 0));
13375
* @ioread: an I/O read function
13376
* @ioclose: an I/O close function
13377
* @ioctx: an I/O handler
13378
* @URL: the base URL to use for the document
13379
* @encoding: the document encoding, or NULL
13380
* @options: a combination of xmlParserOption
13382
* parse an XML document from I/O functions and source and build a tree.
13384
* Returns the resulting document tree
13387
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13388
void *ioctx, const char *URL, const char *encoding, int options)
13390
xmlParserCtxtPtr ctxt;
13391
xmlParserInputBufferPtr input;
13392
xmlParserInputPtr stream;
13394
if (ioread == NULL)
13397
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13398
XML_CHAR_ENCODING_NONE);
13401
ctxt = xmlNewParserCtxt();
13402
if (ctxt == NULL) {
13403
xmlFreeParserInputBuffer(input);
13406
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13407
if (stream == NULL) {
13408
xmlFreeParserInputBuffer(input);
13409
xmlFreeParserCtxt(ctxt);
13412
inputPush(ctxt, stream);
13413
return (xmlDoRead(ctxt, URL, encoding, options, 0));
13418
* @ctxt: an XML parser context
13419
* @cur: a pointer to a zero terminated string
13420
* @URL: the base URL to use for the document
13421
* @encoding: the document encoding, or NULL
13422
* @options: a combination of xmlParserOption
13424
* parse an XML in-memory document and build a tree.
13425
* This reuses the existing @ctxt parser context
13427
* Returns the resulting document tree
13430
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13431
const char *URL, const char *encoding, int options)
13433
xmlParserInputPtr stream;
13440
xmlCtxtReset(ctxt);
13442
stream = xmlNewStringInputStream(ctxt, cur);
13443
if (stream == NULL) {
13446
inputPush(ctxt, stream);
13447
return (xmlDoRead(ctxt, URL, encoding, options, 1));
13452
* @ctxt: an XML parser context
13453
* @filename: a file or URL
13454
* @encoding: the document encoding, or NULL
13455
* @options: a combination of xmlParserOption
13457
* parse an XML file from the filesystem or the network.
13458
* This reuses the existing @ctxt parser context
13460
* Returns the resulting document tree
13463
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13464
const char *encoding, int options)
13466
xmlParserInputPtr stream;
13468
if (filename == NULL)
13473
xmlCtxtReset(ctxt);
13475
stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13476
if (stream == NULL) {
13479
inputPush(ctxt, stream);
13480
return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13484
* xmlCtxtReadMemory:
13485
* @ctxt: an XML parser context
13486
* @buffer: a pointer to a char array
13487
* @size: the size of the array
13488
* @URL: the base URL to use for the document
13489
* @encoding: the document encoding, or NULL
13490
* @options: a combination of xmlParserOption
13492
* parse an XML in-memory document and build a tree.
13493
* This reuses the existing @ctxt parser context
13495
* Returns the resulting document tree
13498
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13499
const char *URL, const char *encoding, int options)
13501
xmlParserInputBufferPtr input;
13502
xmlParserInputPtr stream;
13506
if (buffer == NULL)
13509
xmlCtxtReset(ctxt);
13511
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13512
if (input == NULL) {
13516
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13517
if (stream == NULL) {
13518
xmlFreeParserInputBuffer(input);
13522
inputPush(ctxt, stream);
13523
return (xmlDoRead(ctxt, URL, encoding, options, 1));
13528
* @ctxt: an XML parser context
13529
* @fd: an open file descriptor
13530
* @URL: the base URL to use for the document
13531
* @encoding: the document encoding, or NULL
13532
* @options: a combination of xmlParserOption
13534
* parse an XML from a file descriptor and build a tree.
13535
* This reuses the existing @ctxt parser context
13536
* NOTE that the file descriptor will not be closed when the
13537
* reader is closed or reset.
13539
* Returns the resulting document tree
13542
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13543
const char *URL, const char *encoding, int options)
13545
xmlParserInputBufferPtr input;
13546
xmlParserInputPtr stream;
13553
xmlCtxtReset(ctxt);
13556
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13559
input->closecallback = NULL;
13560
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13561
if (stream == NULL) {
13562
xmlFreeParserInputBuffer(input);
13565
inputPush(ctxt, stream);
13566
return (xmlDoRead(ctxt, URL, encoding, options, 1));
13571
* @ctxt: an XML parser context
13572
* @ioread: an I/O read function
13573
* @ioclose: an I/O close function
13574
* @ioctx: an I/O handler
13575
* @URL: the base URL to use for the document
13576
* @encoding: the document encoding, or NULL
13577
* @options: a combination of xmlParserOption
13579
* parse an XML document from I/O functions and source and build a tree.
13580
* This reuses the existing @ctxt parser context
13582
* Returns the resulting document tree
13585
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13586
xmlInputCloseCallback ioclose, void *ioctx,
13588
const char *encoding, int options)
13590
xmlParserInputBufferPtr input;
13591
xmlParserInputPtr stream;
13593
if (ioread == NULL)
13598
xmlCtxtReset(ctxt);
13600
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13601
XML_CHAR_ENCODING_NONE);
13604
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13605
if (stream == NULL) {
13606
xmlFreeParserInputBuffer(input);
13609
inputPush(ctxt, stream);
13610
return (xmlDoRead(ctxt, URL, encoding, options, 1));
13613
#define bottom_parser
13614
#include "elfgcchack.h"