2
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
* implemented on top of the SAX interfaces
6
* The XML specification:
7
* http://www.w3.org/TR/REC-xml
8
* Original 1.0 version:
9
* http://www.w3.org/TR/1998/REC-xml-19980210
10
* XML second edition working draft
11
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
13
* Okay this is a big file, the parser core is around 7000 lines, then it
14
* is followed by the progressive parser top routines, then the various
15
* high level APIs to call the parser and a few miscellaneous functions.
16
* A number of helper functions and deprecated ones have been moved to
17
* parserInternals.c to reduce this file size.
18
* As much as possible the functions are associated with their relative
19
* production in the XML specification. A few productions defining the
20
* different ranges of character are actually implanted either in
21
* parserInternals.h or parserInternals.c
22
* The DOM tree build is realized from the default SAX callbacks in
24
* The routines doing the validation checks are in valid.c and called either
25
* from the SAX callbacks or as standalone functions using a preparsed
28
* See Copyright for the status of this software.
36
#if defined(WIN32) && !defined (__CYGWIN__)
37
#define XML_DIR_SEP '\\'
39
#define XML_DIR_SEP '/'
46
#include <libxml/xmlmemory.h>
47
#include <libxml/threads.h>
48
#include <libxml/globals.h>
49
#include <libxml/tree.h>
50
#include <libxml/parser.h>
51
#include <libxml/parserInternals.h>
52
#include <libxml/valid.h>
53
#include <libxml/entities.h>
54
#include <libxml/xmlerror.h>
55
#include <libxml/encoding.h>
56
#include <libxml/xmlIO.h>
57
#include <libxml/uri.h>
58
#ifdef LIBXML_CATALOG_ENABLED
59
#include <libxml/catalog.h>
61
#ifdef LIBXML_SCHEMAS_ENABLED
62
#include <libxml/xmlschemastypes.h>
63
#include <libxml/relaxng.h>
71
#ifdef HAVE_SYS_STAT_H
91
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93
static xmlParserCtxtPtr
94
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95
const xmlChar *base, xmlParserCtxtPtr pctx);
97
/************************************************************************
99
* Arbitrary limits set in the parser. See XML_PARSE_HUGE *
101
************************************************************************/
103
#define XML_PARSER_BIG_ENTITY 1000
104
#define XML_PARSER_LOT_ENTITY 5000
107
* XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108
* replacement over the size in byte of the input indicates that you have
109
* and eponential behaviour. A value of 10 correspond to at least 3 entity
110
* replacement per byte of input.
112
#define XML_PARSER_NON_LINEAR 10
115
* xmlParserEntityCheck
117
* Function to check non-linear entity expansion behaviour
118
* This is here to detect and stop exponential linear entity expansion
119
* This is not a limitation of the parser but a safety
120
* boundary feature. It can be disabled with the XML_PARSE_HUGE
124
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125
xmlEntityPtr ent, size_t replacement)
129
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
131
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
133
if (replacement != 0) {
134
if (replacement < XML_MAX_TEXT_LENGTH)
138
* If the volume of entity copy reaches 10 times the
139
* amount of parsed data and over the large text threshold
140
* then that's very likely to be an abuse.
142
if (ctxt->input != NULL) {
143
consumed = ctxt->input->consumed +
144
(ctxt->input->cur - ctxt->input->base);
146
consumed += ctxt->sizeentities;
148
if (replacement < XML_PARSER_NON_LINEAR * consumed)
150
} else if (size != 0) {
152
* Do the check based on the replacement size of the entity
154
if (size < XML_PARSER_BIG_ENTITY)
158
* A limit on the amount of text data reasonably used
160
if (ctxt->input != NULL) {
161
consumed = ctxt->input->consumed +
162
(ctxt->input->cur - ctxt->input->base);
164
consumed += ctxt->sizeentities;
166
if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167
(ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
169
} else if (ent != NULL) {
171
* use the number of parsed entities in the replacement
173
size = ent->checked / 2;
176
* The amount of data parsed counting entities size only once
178
if (ctxt->input != NULL) {
179
consumed = ctxt->input->consumed +
180
(ctxt->input->cur - ctxt->input->base);
182
consumed += ctxt->sizeentities;
185
* Check the density of entities for the amount of data
186
* knowing an entity reference will take at least 3 bytes
188
if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
192
* strange we got no data for checking just return
196
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
203
* arbitrary depth limit for the XML documents that we allow to
204
* process. This is not a limitation of the parser but a safety
205
* boundary feature. It can be disabled with the XML_PARSE_HUGE
208
unsigned int xmlParserMaxDepth = 256;
213
#define XML_PARSER_BIG_BUFFER_SIZE 300
214
#define XML_PARSER_BUFFER_SIZE 100
215
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
218
* XML_PARSER_CHUNK_SIZE
220
* When calling GROW that's the minimal amount of data
221
* the parser expected to have received. It is not a hard
222
* limit but an optimization when reading strings like Names
223
* It is not strictly needed as long as inputs available characters
224
* are followed by 0, which should be provided by the I/O level
226
#define XML_PARSER_CHUNK_SIZE 100
229
* List of XML prefixed PI allowed by W3C specs
232
static const char *xmlW3CPIs[] = {
239
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
240
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241
const xmlChar **str);
243
static xmlParserErrors
244
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245
xmlSAXHandlerPtr sax,
246
void *user_data, int depth, const xmlChar *URL,
247
const xmlChar *ID, xmlNodePtr *list);
250
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251
const char *encoding);
252
#ifdef LIBXML_LEGACY_ENABLED
254
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255
xmlNodePtr lastNode);
256
#endif /* LIBXML_LEGACY_ENABLED */
258
static xmlParserErrors
259
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260
const xmlChar *string, void *user_data, xmlNodePtr *lst);
263
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
265
/************************************************************************
267
* Some factorized error routines *
269
************************************************************************/
272
* xmlErrAttributeDup:
273
* @ctxt: an XML parser context
274
* @prefix: the attribute prefix
275
* @localname: the attribute localname
277
* Handle a redefinition of attribute error
280
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281
const xmlChar * localname)
283
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284
(ctxt->instate == XML_PARSER_EOF))
287
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
290
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
291
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
292
(const char *) localname, NULL, NULL, 0, 0,
293
"Attribute %s redefined\n", localname);
295
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
296
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
297
(const char *) prefix, (const char *) localname,
298
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
301
ctxt->wellFormed = 0;
302
if (ctxt->recovery == 0)
303
ctxt->disableSAX = 1;
309
* @ctxt: an XML parser context
310
* @error: the error number
311
* @extra: extra information string
313
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
316
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
319
char errstr[129] = "";
321
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322
(ctxt->instate == XML_PARSER_EOF))
325
case XML_ERR_INVALID_HEX_CHARREF:
326
errmsg = "CharRef: invalid hexadecimal value";
328
case XML_ERR_INVALID_DEC_CHARREF:
329
errmsg = "CharRef: invalid decimal value";
331
case XML_ERR_INVALID_CHARREF:
332
errmsg = "CharRef: invalid value";
334
case XML_ERR_INTERNAL_ERROR:
335
errmsg = "internal error";
337
case XML_ERR_PEREF_AT_EOF:
338
errmsg = "PEReference at end of document";
340
case XML_ERR_PEREF_IN_PROLOG:
341
errmsg = "PEReference in prolog";
343
case XML_ERR_PEREF_IN_EPILOG:
344
errmsg = "PEReference in epilog";
346
case XML_ERR_PEREF_NO_NAME:
347
errmsg = "PEReference: no name";
349
case XML_ERR_PEREF_SEMICOL_MISSING:
350
errmsg = "PEReference: expecting ';'";
352
case XML_ERR_ENTITY_LOOP:
353
errmsg = "Detected an entity reference loop";
355
case XML_ERR_ENTITY_NOT_STARTED:
356
errmsg = "EntityValue: \" or ' expected";
358
case XML_ERR_ENTITY_PE_INTERNAL:
359
errmsg = "PEReferences forbidden in internal subset";
361
case XML_ERR_ENTITY_NOT_FINISHED:
362
errmsg = "EntityValue: \" or ' expected";
364
case XML_ERR_ATTRIBUTE_NOT_STARTED:
365
errmsg = "AttValue: \" or ' expected";
367
case XML_ERR_LT_IN_ATTRIBUTE:
368
errmsg = "Unescaped '<' not allowed in attributes values";
370
case XML_ERR_LITERAL_NOT_STARTED:
371
errmsg = "SystemLiteral \" or ' expected";
373
case XML_ERR_LITERAL_NOT_FINISHED:
374
errmsg = "Unfinished System or Public ID \" or ' expected";
376
case XML_ERR_MISPLACED_CDATA_END:
377
errmsg = "Sequence ']]>' not allowed in content";
379
case XML_ERR_URI_REQUIRED:
380
errmsg = "SYSTEM or PUBLIC, the URI is missing";
382
case XML_ERR_PUBID_REQUIRED:
383
errmsg = "PUBLIC, the Public Identifier is missing";
385
case XML_ERR_HYPHEN_IN_COMMENT:
386
errmsg = "Comment must not contain '--' (double-hyphen)";
388
case XML_ERR_PI_NOT_STARTED:
389
errmsg = "xmlParsePI : no target name";
391
case XML_ERR_RESERVED_XML_NAME:
392
errmsg = "Invalid PI name";
394
case XML_ERR_NOTATION_NOT_STARTED:
395
errmsg = "NOTATION: Name expected here";
397
case XML_ERR_NOTATION_NOT_FINISHED:
398
errmsg = "'>' required to close NOTATION declaration";
400
case XML_ERR_VALUE_REQUIRED:
401
errmsg = "Entity value required";
403
case XML_ERR_URI_FRAGMENT:
404
errmsg = "Fragment not allowed";
406
case XML_ERR_ATTLIST_NOT_STARTED:
407
errmsg = "'(' required to start ATTLIST enumeration";
409
case XML_ERR_NMTOKEN_REQUIRED:
410
errmsg = "NmToken expected in ATTLIST enumeration";
412
case XML_ERR_ATTLIST_NOT_FINISHED:
413
errmsg = "')' required to finish ATTLIST enumeration";
415
case XML_ERR_MIXED_NOT_STARTED:
416
errmsg = "MixedContentDecl : '|' or ')*' expected";
418
case XML_ERR_PCDATA_REQUIRED:
419
errmsg = "MixedContentDecl : '#PCDATA' expected";
421
case XML_ERR_ELEMCONTENT_NOT_STARTED:
422
errmsg = "ContentDecl : Name or '(' expected";
424
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
425
errmsg = "ContentDecl : ',' '|' or ')' expected";
427
case XML_ERR_PEREF_IN_INT_SUBSET:
429
"PEReference: forbidden within markup decl in internal subset";
431
case XML_ERR_GT_REQUIRED:
432
errmsg = "expected '>'";
434
case XML_ERR_CONDSEC_INVALID:
435
errmsg = "XML conditional section '[' expected";
437
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
438
errmsg = "Content error in the external subset";
440
case XML_ERR_CONDSEC_INVALID_KEYWORD:
442
"conditional section INCLUDE or IGNORE keyword expected";
444
case XML_ERR_CONDSEC_NOT_FINISHED:
445
errmsg = "XML conditional section not closed";
447
case XML_ERR_XMLDECL_NOT_STARTED:
448
errmsg = "Text declaration '<?xml' required";
450
case XML_ERR_XMLDECL_NOT_FINISHED:
451
errmsg = "parsing XML declaration: '?>' expected";
453
case XML_ERR_EXT_ENTITY_STANDALONE:
454
errmsg = "external parsed entities cannot be standalone";
456
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
457
errmsg = "EntityRef: expecting ';'";
459
case XML_ERR_DOCTYPE_NOT_FINISHED:
460
errmsg = "DOCTYPE improperly terminated";
462
case XML_ERR_LTSLASH_REQUIRED:
463
errmsg = "EndTag: '</' not found";
465
case XML_ERR_EQUAL_REQUIRED:
466
errmsg = "expected '='";
468
case XML_ERR_STRING_NOT_CLOSED:
469
errmsg = "String not closed expecting \" or '";
471
case XML_ERR_STRING_NOT_STARTED:
472
errmsg = "String not started expecting ' or \"";
474
case XML_ERR_ENCODING_NAME:
475
errmsg = "Invalid XML encoding name";
477
case XML_ERR_STANDALONE_VALUE:
478
errmsg = "standalone accepts only 'yes' or 'no'";
480
case XML_ERR_DOCUMENT_EMPTY:
481
errmsg = "Document is empty";
483
case XML_ERR_DOCUMENT_END:
484
errmsg = "Extra content at the end of the document";
486
case XML_ERR_NOT_WELL_BALANCED:
487
errmsg = "chunk is not well balanced";
489
case XML_ERR_EXTRA_CONTENT:
490
errmsg = "extra content at the end of well balanced chunk";
492
case XML_ERR_VERSION_MISSING:
493
errmsg = "Malformed declaration expecting version";
495
case XML_ERR_NAME_TOO_LONG:
496
errmsg = "Name too long use XML_PARSE_HUGE option";
504
errmsg = "Unregistered error message";
507
snprintf(errstr, 128, "%s\n", errmsg);
509
snprintf(errstr, 128, "%s: %%s\n", errmsg);
512
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
513
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
516
ctxt->wellFormed = 0;
517
if (ctxt->recovery == 0)
518
ctxt->disableSAX = 1;
524
* @ctxt: an XML parser context
525
* @error: the error number
526
* @msg: the error message
528
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
531
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
534
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535
(ctxt->instate == XML_PARSER_EOF))
539
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
540
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
542
ctxt->wellFormed = 0;
543
if (ctxt->recovery == 0)
544
ctxt->disableSAX = 1;
550
* @ctxt: an XML parser context
551
* @error: the error number
552
* @msg: the error message
559
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560
const char *msg, const xmlChar *str1, const xmlChar *str2)
562
xmlStructuredErrorFunc schannel = NULL;
564
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565
(ctxt->instate == XML_PARSER_EOF))
567
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568
(ctxt->sax->initialized == XML_SAX2_MAGIC))
569
schannel = ctxt->sax->serror;
571
__xmlRaiseError(schannel,
572
(ctxt->sax) ? ctxt->sax->warning : NULL,
574
ctxt, NULL, XML_FROM_PARSER, error,
575
XML_ERR_WARNING, NULL, 0,
576
(const char *) str1, (const char *) str2, NULL, 0, 0,
577
msg, (const char *) str1, (const char *) str2);
579
__xmlRaiseError(schannel, NULL, NULL,
580
ctxt, NULL, XML_FROM_PARSER, error,
581
XML_ERR_WARNING, NULL, 0,
582
(const char *) str1, (const char *) str2, NULL, 0, 0,
583
msg, (const char *) str1, (const char *) str2);
589
* @ctxt: an XML parser context
590
* @error: the error number
591
* @msg: the error message
594
* Handle a validity error.
597
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
598
const char *msg, const xmlChar *str1, const xmlChar *str2)
600
xmlStructuredErrorFunc schannel = NULL;
602
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603
(ctxt->instate == XML_PARSER_EOF))
607
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608
schannel = ctxt->sax->serror;
611
__xmlRaiseError(schannel,
612
ctxt->vctxt.error, ctxt->vctxt.userData,
613
ctxt, NULL, XML_FROM_DTD, error,
614
XML_ERR_ERROR, NULL, 0, (const char *) str1,
615
(const char *) str2, NULL, 0, 0,
616
msg, (const char *) str1, (const char *) str2);
619
__xmlRaiseError(schannel, NULL, NULL,
620
ctxt, NULL, XML_FROM_DTD, error,
621
XML_ERR_ERROR, NULL, 0, (const char *) str1,
622
(const char *) str2, NULL, 0, 0,
623
msg, (const char *) str1, (const char *) str2);
629
* @ctxt: an XML parser context
630
* @error: the error number
631
* @msg: the error message
632
* @val: an integer value
634
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
637
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
638
const char *msg, int val)
640
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641
(ctxt->instate == XML_PARSER_EOF))
645
__xmlRaiseError(NULL, NULL, NULL,
646
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
649
ctxt->wellFormed = 0;
650
if (ctxt->recovery == 0)
651
ctxt->disableSAX = 1;
656
* xmlFatalErrMsgStrIntStr:
657
* @ctxt: an XML parser context
658
* @error: the error number
659
* @msg: the error message
660
* @str1: an string info
661
* @val: an integer value
662
* @str2: an string info
664
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
667
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
668
const char *msg, const xmlChar *str1, int val,
671
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672
(ctxt->instate == XML_PARSER_EOF))
676
__xmlRaiseError(NULL, NULL, NULL,
677
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678
NULL, 0, (const char *) str1, (const char *) str2,
679
NULL, val, 0, msg, str1, val, str2);
681
ctxt->wellFormed = 0;
682
if (ctxt->recovery == 0)
683
ctxt->disableSAX = 1;
689
* @ctxt: an XML parser context
690
* @error: the error number
691
* @msg: the error message
692
* @val: a string value
694
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
697
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
698
const char *msg, const xmlChar * val)
700
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701
(ctxt->instate == XML_PARSER_EOF))
705
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
706
XML_FROM_PARSER, error, XML_ERR_FATAL,
707
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
710
ctxt->wellFormed = 0;
711
if (ctxt->recovery == 0)
712
ctxt->disableSAX = 1;
718
* @ctxt: an XML parser context
719
* @error: the error number
720
* @msg: the error message
721
* @val: a string value
723
* Handle a non fatal parser error
726
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727
const char *msg, const xmlChar * val)
729
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730
(ctxt->instate == XML_PARSER_EOF))
734
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
735
XML_FROM_PARSER, error, XML_ERR_ERROR,
736
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
742
* @ctxt: an XML parser context
743
* @error: the error number
745
* @info1: extra information string
746
* @info2: extra information string
748
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
751
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753
const xmlChar * info1, const xmlChar * info2,
754
const xmlChar * info3)
756
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757
(ctxt->instate == XML_PARSER_EOF))
761
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
762
XML_ERR_ERROR, NULL, 0, (const char *) info1,
763
(const char *) info2, (const char *) info3, 0, 0, msg,
764
info1, info2, info3);
766
ctxt->nsWellFormed = 0;
771
* @ctxt: an XML parser context
772
* @error: the error number
774
* @info1: extra information string
775
* @info2: extra information string
777
* Handle a namespace warning error
780
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
782
const xmlChar * info1, const xmlChar * info2,
783
const xmlChar * info3)
785
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786
(ctxt->instate == XML_PARSER_EOF))
788
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789
XML_ERR_WARNING, NULL, 0, (const char *) info1,
790
(const char *) info2, (const char *) info3, 0, 0, msg,
791
info1, info2, info3);
794
/************************************************************************
796
* Library wide options *
798
************************************************************************/
802
* @feature: the feature to be examined
804
* Examines if the library has been compiled with a given feature.
806
* Returns a non-zero value if the feature exist, otherwise zero.
807
* Returns zero (0) if the feature does not exist or an unknown
808
* unknown feature is requested, non-zero otherwise.
811
xmlHasFeature(xmlFeature feature)
814
case XML_WITH_THREAD:
815
#ifdef LIBXML_THREAD_ENABLED
821
#ifdef LIBXML_TREE_ENABLED
826
case XML_WITH_OUTPUT:
827
#ifdef LIBXML_OUTPUT_ENABLED
833
#ifdef LIBXML_PUSH_ENABLED
838
case XML_WITH_READER:
839
#ifdef LIBXML_READER_ENABLED
844
case XML_WITH_PATTERN:
845
#ifdef LIBXML_PATTERN_ENABLED
850
case XML_WITH_WRITER:
851
#ifdef LIBXML_WRITER_ENABLED
857
#ifdef LIBXML_SAX1_ENABLED
863
#ifdef LIBXML_FTP_ENABLED
869
#ifdef LIBXML_HTTP_ENABLED
875
#ifdef LIBXML_VALID_ENABLED
881
#ifdef LIBXML_HTML_ENABLED
886
case XML_WITH_LEGACY:
887
#ifdef LIBXML_LEGACY_ENABLED
893
#ifdef LIBXML_C14N_ENABLED
898
case XML_WITH_CATALOG:
899
#ifdef LIBXML_CATALOG_ENABLED
905
#ifdef LIBXML_XPATH_ENABLED
911
#ifdef LIBXML_XPTR_ENABLED
916
case XML_WITH_XINCLUDE:
917
#ifdef LIBXML_XINCLUDE_ENABLED
923
#ifdef LIBXML_ICONV_ENABLED
928
case XML_WITH_ISO8859X:
929
#ifdef LIBXML_ISO8859X_ENABLED
934
case XML_WITH_UNICODE:
935
#ifdef LIBXML_UNICODE_ENABLED
940
case XML_WITH_REGEXP:
941
#ifdef LIBXML_REGEXP_ENABLED
946
case XML_WITH_AUTOMATA:
947
#ifdef LIBXML_AUTOMATA_ENABLED
953
#ifdef LIBXML_EXPR_ENABLED
958
case XML_WITH_SCHEMAS:
959
#ifdef LIBXML_SCHEMAS_ENABLED
964
case XML_WITH_SCHEMATRON:
965
#ifdef LIBXML_SCHEMATRON_ENABLED
970
case XML_WITH_MODULES:
971
#ifdef LIBXML_MODULES_ENABLED
977
#ifdef LIBXML_DEBUG_ENABLED
982
case XML_WITH_DEBUG_MEM:
983
#ifdef DEBUG_MEMORY_LOCATION
988
case XML_WITH_DEBUG_RUN:
989
#ifdef LIBXML_DEBUG_RUNTIME
995
#ifdef LIBXML_ZLIB_ENABLED
1001
#ifdef LIBXML_LZMA_ENABLED
1007
#ifdef LIBXML_ICU_ENABLED
1018
/************************************************************************
1020
* SAX2 defaulted attributes handling *
1022
************************************************************************/
1026
* @ctxt: an XML parser context
1028
* Do the SAX2 detection and specific intialization
1031
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032
if (ctxt == NULL) return;
1033
#ifdef LIBXML_SAX1_ENABLED
1034
if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035
((ctxt->sax->startElementNs != NULL) ||
1036
(ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1039
#endif /* LIBXML_SAX1_ENABLED */
1041
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1044
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045
(ctxt->str_xml_ns == NULL)) {
1046
xmlErrMemory(ctxt, NULL);
1050
typedef struct _xmlDefAttrs xmlDefAttrs;
1051
typedef xmlDefAttrs *xmlDefAttrsPtr;
1052
struct _xmlDefAttrs {
1053
int nbAttrs; /* number of defaulted attributes on that element */
1054
int maxAttrs; /* the size of the array */
1055
const xmlChar *values[5]; /* array of localname/prefix/values/external */
1059
* xmlAttrNormalizeSpace:
1060
* @src: the source string
1061
* @dst: the target string
1063
* Normalize the space in non CDATA attribute values:
1064
* If the attribute type is not CDATA, then the XML processor MUST further
1065
* process the normalized attribute value by discarding any leading and
1066
* trailing space (#x20) characters, and by replacing sequences of space
1067
* (#x20) characters by a single space (#x20) character.
1068
* Note that the size of dst need to be at least src, and if one doesn't need
1069
* to preserve dst (and it doesn't come from a dictionary or read-only) then
1070
* passing src as dst is just fine.
1072
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1076
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1078
if ((src == NULL) || (dst == NULL))
1081
while (*src == 0x20) src++;
1084
while (*src == 0x20) src++;
1098
* xmlAttrNormalizeSpace2:
1099
* @src: the source string
1101
* Normalize the space in non CDATA attribute values, a slightly more complex
1102
* front end to avoid allocation problems when running on attribute values
1103
* coming from the input.
1105
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1108
static const xmlChar *
1109
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1112
int remove_head = 0;
1113
int need_realloc = 0;
1116
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1123
while (*cur == 0x20) {
1130
if ((*cur == 0x20) || (*cur == 0)) {
1140
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1142
xmlErrMemory(ctxt, NULL);
1145
xmlAttrNormalizeSpace(ret, ret);
1146
*len = (int) strlen((const char *)ret);
1148
} else if (remove_head) {
1149
*len -= remove_head;
1150
memmove(src, src + remove_head, 1 + *len);
1158
* @ctxt: an XML parser context
1159
* @fullname: the element fullname
1160
* @fullattr: the attribute fullname
1161
* @value: the attribute value
1163
* Add a defaulted attribute for an element
1166
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167
const xmlChar *fullname,
1168
const xmlChar *fullattr,
1169
const xmlChar *value) {
1170
xmlDefAttrsPtr defaults;
1172
const xmlChar *name;
1173
const xmlChar *prefix;
1176
* Allows to detect attribute redefinitions
1178
if (ctxt->attsSpecial != NULL) {
1179
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1183
if (ctxt->attsDefault == NULL) {
1184
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1185
if (ctxt->attsDefault == NULL)
1190
* split the element name into prefix:localname , the string found
1191
* are within the DTD and then not associated to namespace names.
1193
name = xmlSplitQName3(fullname, &len);
1195
name = xmlDictLookup(ctxt->dict, fullname, -1);
1198
name = xmlDictLookup(ctxt->dict, name, -1);
1199
prefix = xmlDictLookup(ctxt->dict, fullname, len);
1203
* make sure there is some storage
1205
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206
if (defaults == NULL) {
1207
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1208
(4 * 5) * sizeof(const xmlChar *));
1209
if (defaults == NULL)
1211
defaults->nbAttrs = 0;
1212
defaults->maxAttrs = 4;
1213
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214
defaults, NULL) < 0) {
1218
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
1219
xmlDefAttrsPtr temp;
1221
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1222
(2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1226
defaults->maxAttrs *= 2;
1227
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228
defaults, NULL) < 0) {
1235
* Split the element name into prefix:localname , the string found
1236
* are within the DTD and hen not associated to namespace names.
1238
name = xmlSplitQName3(fullattr, &len);
1240
name = xmlDictLookup(ctxt->dict, fullattr, -1);
1243
name = xmlDictLookup(ctxt->dict, name, -1);
1244
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1247
defaults->values[5 * defaults->nbAttrs] = name;
1248
defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1249
/* intern the string and precompute the end */
1250
len = xmlStrlen(value);
1251
value = xmlDictLookup(ctxt->dict, value, len);
1252
defaults->values[5 * defaults->nbAttrs + 2] = value;
1253
defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1255
defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1257
defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1258
defaults->nbAttrs++;
1263
xmlErrMemory(ctxt, NULL);
1268
* xmlAddSpecialAttr:
1269
* @ctxt: an XML parser context
1270
* @fullname: the element fullname
1271
* @fullattr: the attribute fullname
1272
* @type: the attribute type
1274
* Register this attribute type
1277
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278
const xmlChar *fullname,
1279
const xmlChar *fullattr,
1282
if (ctxt->attsSpecial == NULL) {
1283
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1284
if (ctxt->attsSpecial == NULL)
1288
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1291
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292
(void *) (long) type);
1296
xmlErrMemory(ctxt, NULL);
1301
* xmlCleanSpecialAttrCallback:
1303
* Removes CDATA attributes from the special attribute table
1306
xmlCleanSpecialAttrCallback(void *payload, void *data,
1307
const xmlChar *fullname, const xmlChar *fullattr,
1308
const xmlChar *unused ATTRIBUTE_UNUSED) {
1309
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1311
if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1312
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1317
* xmlCleanSpecialAttr:
1318
* @ctxt: an XML parser context
1320
* Trim the list of attributes defined to remove all those of type
1321
* CDATA as they are not special. This call should be done when finishing
1322
* to parse the DTD and before starting to parse the document root.
1325
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1327
if (ctxt->attsSpecial == NULL)
1330
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1332
if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333
xmlHashFree(ctxt->attsSpecial, NULL);
1334
ctxt->attsSpecial = NULL;
1340
* xmlCheckLanguageID:
1341
* @lang: pointer to the string value
1343
* Checks that the value conforms to the LanguageID production:
1345
* NOTE: this is somewhat deprecated, those productions were removed from
1346
* the XML Second edition.
1348
* [33] LanguageID ::= Langcode ('-' Subcode)*
1349
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353
* [38] Subcode ::= ([a-z] | [A-Z])+
1355
* The current REC reference the sucessors of RFC 1766, currently 5646
1357
* http://www.rfc-editor.org/rfc/rfc5646.txt
1358
* langtag = language
1364
* language = 2*3ALPHA ; shortest ISO 639 code
1365
* ["-" extlang] ; sometimes followed by
1366
* ; extended language subtags
1367
* / 4ALPHA ; or reserved for future use
1368
* / 5*8ALPHA ; or registered language subtag
1370
* extlang = 3ALPHA ; selected ISO 639 codes
1371
* *2("-" 3ALPHA) ; permanently reserved
1373
* script = 4ALPHA ; ISO 15924 code
1375
* region = 2ALPHA ; ISO 3166-1 code
1376
* / 3DIGIT ; UN M.49 code
1378
* variant = 5*8alphanum ; registered variants
1379
* / (DIGIT 3alphanum)
1381
* extension = singleton 1*("-" (2*8alphanum))
1383
* ; Single alphanumerics
1384
* ; "x" reserved for private use
1385
* singleton = DIGIT ; 0 - 9
1391
* it sounds right to still allow Irregular i-xxx IANA and user codes too
1392
* The parser below doesn't try to cope with extension or privateuse
1393
* that could be added but that's not interoperable anyway
1395
* Returns 1 if correct 0 otherwise
1398
xmlCheckLanguageID(const xmlChar * lang)
1400
const xmlChar *cur = lang, *nxt;
1404
if (((cur[0] == 'i') && (cur[1] == '-')) ||
1405
((cur[0] == 'I') && (cur[1] == '-')) ||
1406
((cur[0] == 'x') && (cur[1] == '-')) ||
1407
((cur[0] == 'X') && (cur[1] == '-'))) {
1409
* Still allow IANA code and user code which were coming
1410
* from the previous version of the XML-1.0 specification
1411
* it's deprecated but we should not fail
1414
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1415
((cur[0] >= 'a') && (cur[0] <= 'z')))
1417
return(cur[0] == 0);
1420
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1423
if (nxt - cur >= 4) {
1427
if ((nxt - cur > 8) || (nxt[0] != 0))
1433
/* we got an ISO 639 code */
1441
/* now we can have extlang or script or region or variant */
1442
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1445
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1452
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456
/* we parsed an extlang */
1464
/* now we can have script or region or variant */
1465
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1468
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1477
/* we parsed a script */
1486
/* now we can have region or variant */
1487
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1490
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1494
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1498
/* we parsed a region */
1507
/* now we can just have a variant */
1508
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1512
if ((nxt - cur < 5) || (nxt - cur > 8))
1515
/* we parsed a variant */
1521
/* extensions and private use subtags not checked */
1525
if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526
((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1533
/************************************************************************
1535
* Parser stacks related functions and macros *
1537
************************************************************************/
1539
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540
const xmlChar ** str);
1545
* @ctxt: an XML parser context
1546
* @prefix: the namespace prefix or NULL
1547
* @URL: the namespace name
1549
* Pushes a new parser namespace on top of the ns stack
1551
* Returns -1 in case of error, -2 if the namespace should be discarded
1552
* and the index in the stack otherwise.
1555
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1557
if (ctxt->options & XML_PARSE_NSCLEAN) {
1559
for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1560
if (ctxt->nsTab[i] == prefix) {
1562
if (ctxt->nsTab[i + 1] == URL)
1564
/* out of scope keep it */
1569
if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1572
ctxt->nsTab = (const xmlChar **)
1573
xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574
if (ctxt->nsTab == NULL) {
1575
xmlErrMemory(ctxt, NULL);
1579
} else if (ctxt->nsNr >= ctxt->nsMax) {
1580
const xmlChar ** tmp;
1582
tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583
ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1585
xmlErrMemory(ctxt, NULL);
1591
ctxt->nsTab[ctxt->nsNr++] = prefix;
1592
ctxt->nsTab[ctxt->nsNr++] = URL;
1593
return (ctxt->nsNr);
1597
* @ctxt: an XML parser context
1598
* @nr: the number to pop
1600
* Pops the top @nr parser prefix/namespace from the ns stack
1602
* Returns the number of namespaces removed
1605
nsPop(xmlParserCtxtPtr ctxt, int nr)
1609
if (ctxt->nsTab == NULL) return(0);
1610
if (ctxt->nsNr < nr) {
1611
xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1614
if (ctxt->nsNr <= 0)
1617
for (i = 0;i < nr;i++) {
1619
ctxt->nsTab[ctxt->nsNr] = NULL;
1626
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627
const xmlChar **atts;
1631
if (ctxt->atts == NULL) {
1632
maxatts = 55; /* allow for 10 attrs by default */
1633
atts = (const xmlChar **)
1634
xmlMalloc(maxatts * sizeof(xmlChar *));
1635
if (atts == NULL) goto mem_error;
1637
attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638
if (attallocs == NULL) goto mem_error;
1639
ctxt->attallocs = attallocs;
1640
ctxt->maxatts = maxatts;
1641
} else if (nr + 5 > ctxt->maxatts) {
1642
maxatts = (nr + 5) * 2;
1643
atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644
maxatts * sizeof(const xmlChar *));
1645
if (atts == NULL) goto mem_error;
1647
attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648
(maxatts / 5) * sizeof(int));
1649
if (attallocs == NULL) goto mem_error;
1650
ctxt->attallocs = attallocs;
1651
ctxt->maxatts = maxatts;
1653
return(ctxt->maxatts);
1655
xmlErrMemory(ctxt, NULL);
1661
* @ctxt: an XML parser context
1662
* @value: the parser input
1664
* Pushes a new parser input on top of the input stack
1666
* Returns -1 in case of error, the index in the stack otherwise
1669
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1671
if ((ctxt == NULL) || (value == NULL))
1673
if (ctxt->inputNr >= ctxt->inputMax) {
1674
ctxt->inputMax *= 2;
1676
(xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1678
sizeof(ctxt->inputTab[0]));
1679
if (ctxt->inputTab == NULL) {
1680
xmlErrMemory(ctxt, NULL);
1681
xmlFreeInputStream(value);
1682
ctxt->inputMax /= 2;
1687
ctxt->inputTab[ctxt->inputNr] = value;
1688
ctxt->input = value;
1689
return (ctxt->inputNr++);
1693
* @ctxt: an XML parser context
1695
* Pops the top parser input from the input stack
1697
* Returns the input just removed
1700
inputPop(xmlParserCtxtPtr ctxt)
1702
xmlParserInputPtr ret;
1706
if (ctxt->inputNr <= 0)
1709
if (ctxt->inputNr > 0)
1710
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1713
ret = ctxt->inputTab[ctxt->inputNr];
1714
ctxt->inputTab[ctxt->inputNr] = NULL;
1719
* @ctxt: an XML parser context
1720
* @value: the element node
1722
* Pushes a new element node on top of the node stack
1724
* Returns -1 in case of error, the index in the stack otherwise
1727
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1729
if (ctxt == NULL) return(0);
1730
if (ctxt->nodeNr >= ctxt->nodeMax) {
1733
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1735
sizeof(ctxt->nodeTab[0]));
1737
xmlErrMemory(ctxt, NULL);
1740
ctxt->nodeTab = tmp;
1743
if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744
((ctxt->options & XML_PARSE_HUGE) == 0)) {
1745
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1746
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1748
ctxt->instate = XML_PARSER_EOF;
1751
ctxt->nodeTab[ctxt->nodeNr] = value;
1753
return (ctxt->nodeNr++);
1758
* @ctxt: an XML parser context
1760
* Pops the top element node from the node stack
1762
* Returns the node just removed
1765
nodePop(xmlParserCtxtPtr ctxt)
1769
if (ctxt == NULL) return(NULL);
1770
if (ctxt->nodeNr <= 0)
1773
if (ctxt->nodeNr > 0)
1774
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1777
ret = ctxt->nodeTab[ctxt->nodeNr];
1778
ctxt->nodeTab[ctxt->nodeNr] = NULL;
1782
#ifdef LIBXML_PUSH_ENABLED
1785
* @ctxt: an XML parser context
1786
* @value: the element name
1787
* @prefix: the element prefix
1788
* @URI: the element namespace name
1790
* Pushes a new element name/prefix/URL on top of the name stack
1792
* Returns -1 in case of error, the index in the stack otherwise
1795
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796
const xmlChar *prefix, const xmlChar *URI, int nsNr)
1798
if (ctxt->nameNr >= ctxt->nameMax) {
1799
const xmlChar * *tmp;
1802
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1804
sizeof(ctxt->nameTab[0]));
1809
ctxt->nameTab = tmp;
1810
tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1812
sizeof(ctxt->pushTab[0]));
1817
ctxt->pushTab = tmp2;
1819
ctxt->nameTab[ctxt->nameNr] = value;
1821
ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822
ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1823
ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1824
return (ctxt->nameNr++);
1826
xmlErrMemory(ctxt, NULL);
1831
* @ctxt: an XML parser context
1833
* Pops the top element/prefix/URI name from the name stack
1835
* Returns the name just removed
1837
static const xmlChar *
1838
nameNsPop(xmlParserCtxtPtr ctxt)
1842
if (ctxt->nameNr <= 0)
1845
if (ctxt->nameNr > 0)
1846
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1849
ret = ctxt->nameTab[ctxt->nameNr];
1850
ctxt->nameTab[ctxt->nameNr] = NULL;
1853
#endif /* LIBXML_PUSH_ENABLED */
1857
* @ctxt: an XML parser context
1858
* @value: the element name
1860
* Pushes a new element name on top of the name stack
1862
* Returns -1 in case of error, the index in the stack otherwise
1865
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1867
if (ctxt == NULL) return (-1);
1869
if (ctxt->nameNr >= ctxt->nameMax) {
1870
const xmlChar * *tmp;
1871
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1873
sizeof(ctxt->nameTab[0]));
1877
ctxt->nameTab = tmp;
1880
ctxt->nameTab[ctxt->nameNr] = value;
1882
return (ctxt->nameNr++);
1884
xmlErrMemory(ctxt, NULL);
1889
* @ctxt: an XML parser context
1891
* Pops the top element name from the name stack
1893
* Returns the name just removed
1896
namePop(xmlParserCtxtPtr ctxt)
1900
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1903
if (ctxt->nameNr > 0)
1904
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1907
ret = ctxt->nameTab[ctxt->nameNr];
1908
ctxt->nameTab[ctxt->nameNr] = NULL;
1912
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1913
if (ctxt->spaceNr >= ctxt->spaceMax) {
1916
ctxt->spaceMax *= 2;
1917
tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1920
xmlErrMemory(ctxt, NULL);
1924
ctxt->spaceTab = tmp;
1926
ctxt->spaceTab[ctxt->spaceNr] = val;
1927
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928
return(ctxt->spaceNr++);
1931
static int spacePop(xmlParserCtxtPtr ctxt) {
1933
if (ctxt->spaceNr <= 0) return(0);
1935
if (ctxt->spaceNr > 0)
1936
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1938
ctxt->space = &ctxt->spaceTab[0];
1939
ret = ctxt->spaceTab[ctxt->spaceNr];
1940
ctxt->spaceTab[ctxt->spaceNr] = -1;
1945
* Macros for accessing the content. Those should be used only by the parser,
1948
* Dirty macros, i.e. one often need to make assumption on the context to
1951
* CUR_PTR return the current pointer to the xmlChar to be parsed.
1952
* To be used with extreme caution since operations consuming
1953
* characters may move the input buffer to a different location !
1954
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955
* This should be used internally by the parser
1956
* only to compare to ASCII values otherwise it would break when
1957
* running with UTF-8 encoding.
1958
* RAW same as CUR but in the input buffer, bypass any token
1959
* extraction that may have been done
1960
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961
* to compare on ASCII based substring.
1962
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1963
* strings without newlines within the parser.
1964
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1965
* defined char within the parser.
1966
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1968
* NEXT Skip to the next character, this does the proper decoding
1969
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
1970
* NEXTL(l) Skip the current unicode character of l xmlChars long.
1971
* CUR_CHAR(l) returns the current unicode character (int), set l
1972
* to the number of xmlChars used for the encoding [0-5].
1973
* CUR_SCHAR same but operate on a string instead of the context
1974
* COPY_BUF copy the current unicode char to the target buffer, increment
1976
* GROW, SHRINK handling of input buffers
1979
#define RAW (*ctxt->input->cur)
1980
#define CUR (*ctxt->input->cur)
1981
#define NXT(val) ctxt->input->cur[(val)]
1982
#define CUR_PTR ctxt->input->cur
1984
#define CMP4( s, c1, c2, c3, c4 ) \
1985
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997
((unsigned char *) s)[ 8 ] == c9 )
1998
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000
((unsigned char *) s)[ 9 ] == c10 )
2002
#define SKIP(val) do { \
2003
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2004
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2005
if ((*ctxt->input->cur == 0) && \
2006
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007
xmlPopInput(ctxt); \
2010
#define SKIPL(val) do { \
2012
for(skipl=0; skipl<val; skipl++) { \
2013
if (*(ctxt->input->cur) == '\n') { \
2014
ctxt->input->line++; ctxt->input->col = 1; \
2015
} else ctxt->input->col++; \
2017
ctxt->input->cur++; \
2019
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020
if ((*ctxt->input->cur == 0) && \
2021
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022
xmlPopInput(ctxt); \
2025
#define SHRINK if ((ctxt->progressive == 0) && \
2026
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2030
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031
xmlParserInputShrink(ctxt->input);
2032
if ((*ctxt->input->cur == 0) &&
2033
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2037
#define GROW if ((ctxt->progressive == 0) && \
2038
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2041
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2042
if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
2044
((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2045
((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2047
ctxt->instate = XML_PARSER_EOF;
2049
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2050
if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2051
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2057
#define NEXT xmlNextChar(ctxt)
2060
ctxt->input->col++; \
2061
ctxt->input->cur++; \
2063
if (*ctxt->input->cur == 0) \
2064
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2067
#define NEXTL(l) do { \
2068
if (*(ctxt->input->cur) == '\n') { \
2069
ctxt->input->line++; ctxt->input->col = 1; \
2070
} else ctxt->input->col++; \
2071
ctxt->input->cur += l; \
2072
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2075
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2078
#define COPY_BUF(l,b,i,v) \
2079
if (l == 1) b[i++] = (xmlChar) v; \
2080
else i += xmlCopyCharMultiByte(&b[i],v)
2083
* xmlSkipBlankChars:
2084
* @ctxt: the XML parser context
2086
* skip all blanks character found at that point in the input streams.
2087
* It pops up finished entities in the process if allowable at that point.
2089
* Returns the number of space chars skipped
2093
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2097
* It's Okay to use CUR/NEXT here since all the blanks are on
2100
if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2103
* if we are in the document content, go really fast
2105
cur = ctxt->input->cur;
2106
while (IS_BLANK_CH(*cur)) {
2108
ctxt->input->line++; ctxt->input->col = 1;
2113
ctxt->input->cur = cur;
2114
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115
cur = ctxt->input->cur;
2118
ctxt->input->cur = cur;
2123
while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2128
while ((cur == 0) && (ctxt->inputNr > 1) &&
2129
(ctxt->instate != XML_PARSER_COMMENT)) {
2134
* Need to handle support of entities branching here
2136
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137
} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2142
/************************************************************************
2144
* Commodity functions to handle entities *
2146
************************************************************************/
2150
* @ctxt: an XML parser context
2152
* xmlPopInput: the current input pointed by ctxt->input came to an end
2153
* pop it and return the next char.
2155
* Returns the current xmlChar in the parser context
2158
xmlPopInput(xmlParserCtxtPtr ctxt) {
2159
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2160
if (xmlParserDebugEntities)
2161
xmlGenericError(xmlGenericErrorContext,
2162
"Popping input %d\n", ctxt->inputNr);
2163
xmlFreeInputStream(inputPop(ctxt));
2164
if ((*ctxt->input->cur == 0) &&
2165
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166
return(xmlPopInput(ctxt));
2172
* @ctxt: an XML parser context
2173
* @input: an XML parser input fragment (entity, XML fragment ...).
2175
* xmlPushInput: switch to a new input stream which is stacked on top
2176
* of the previous one(s).
2177
* Returns -1 in case of error or the index in the input stack
2180
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2182
if (input == NULL) return(-1);
2184
if (xmlParserDebugEntities) {
2185
if ((ctxt->input != NULL) && (ctxt->input->filename))
2186
xmlGenericError(xmlGenericErrorContext,
2187
"%s(%d): ", ctxt->input->filename,
2189
xmlGenericError(xmlGenericErrorContext,
2190
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2192
ret = inputPush(ctxt, input);
2193
if (ctxt->instate == XML_PARSER_EOF)
2201
* @ctxt: an XML parser context
2203
* parse Reference declarations
2205
* [66] CharRef ::= '&#' [0-9]+ ';' |
2206
* '&#x' [0-9a-fA-F]+ ';'
2208
* [ WFC: Legal Character ]
2209
* Characters referred to using character references must match the
2210
* production for Char.
2212
* Returns the value parsed (as an int), 0 in case of error
2215
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2216
unsigned int val = 0;
2218
unsigned int outofrange = 0;
2221
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2223
if ((RAW == '&') && (NXT(1) == '#') &&
2227
while (RAW != ';') { /* loop blocked by count */
2231
if (ctxt->instate == XML_PARSER_EOF)
2234
if ((RAW >= '0') && (RAW <= '9'))
2235
val = val * 16 + (CUR - '0');
2236
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237
val = val * 16 + (CUR - 'a') + 10;
2238
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239
val = val * 16 + (CUR - 'A') + 10;
2241
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2252
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2257
} else if ((RAW == '&') && (NXT(1) == '#')) {
2260
while (RAW != ';') { /* loop blocked by count */
2264
if (ctxt->instate == XML_PARSER_EOF)
2267
if ((RAW >= '0') && (RAW <= '9'))
2268
val = val * 10 + (CUR - '0');
2270
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2281
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2287
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2291
* [ WFC: Legal Character ]
2292
* Characters referred to using character references must match the
2293
* production for Char.
2295
if ((IS_CHAR(val) && (outofrange == 0))) {
2298
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299
"xmlParseCharRef: invalid xmlChar value %d\n",
2306
* xmlParseStringCharRef:
2307
* @ctxt: an XML parser context
2308
* @str: a pointer to an index in the string
2310
* parse Reference declarations, variant parsing from a string rather
2311
* than an an input flow.
2313
* [66] CharRef ::= '&#' [0-9]+ ';' |
2314
* '&#x' [0-9a-fA-F]+ ';'
2316
* [ WFC: Legal Character ]
2317
* Characters referred to using character references must match the
2318
* production for Char.
2320
* Returns the value parsed (as an int), 0 in case of error, str will be
2321
* updated to the current value of the index
2324
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2327
unsigned int val = 0;
2328
unsigned int outofrange = 0;
2330
if ((str == NULL) || (*str == NULL)) return(0);
2333
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2336
while (cur != ';') { /* Non input consuming loop */
2337
if ((cur >= '0') && (cur <= '9'))
2338
val = val * 16 + (cur - '0');
2339
else if ((cur >= 'a') && (cur <= 'f'))
2340
val = val * 16 + (cur - 'a') + 10;
2341
else if ((cur >= 'A') && (cur <= 'F'))
2342
val = val * 16 + (cur - 'A') + 10;
2344
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2356
} else if ((cur == '&') && (ptr[1] == '#')){
2359
while (cur != ';') { /* Non input consuming loops */
2360
if ((cur >= '0') && (cur <= '9'))
2361
val = val * 10 + (cur - '0');
2363
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2376
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2382
* [ WFC: Legal Character ]
2383
* Characters referred to using character references must match the
2384
* production for Char.
2386
if ((IS_CHAR(val) && (outofrange == 0))) {
2389
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
"xmlParseStringCharRef: invalid xmlChar value %d\n",
2397
* xmlNewBlanksWrapperInputStream:
2398
* @ctxt: an XML parser context
2399
* @entity: an Entity pointer
2401
* Create a new input stream for wrapping
2402
* blanks around a PEReference
2404
* Returns the new input stream or NULL
2407
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2409
static xmlParserInputPtr
2410
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411
xmlParserInputPtr input;
2414
if (entity == NULL) {
2415
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416
"xmlNewBlanksWrapperInputStream entity\n");
2419
if (xmlParserDebugEntities)
2420
xmlGenericError(xmlGenericErrorContext,
2421
"new blanks wrapper for entity: %s\n", entity->name);
2422
input = xmlNewInputStream(ctxt);
2423
if (input == NULL) {
2426
length = xmlStrlen(entity->name) + 5;
2427
buffer = xmlMallocAtomic(length);
2428
if (buffer == NULL) {
2429
xmlErrMemory(ctxt, NULL);
2435
buffer [length-3] = ';';
2436
buffer [length-2] = ' ';
2437
buffer [length-1] = 0;
2438
memcpy(buffer + 2, entity->name, length - 5);
2439
input->free = deallocblankswrapper;
2440
input->base = buffer;
2441
input->cur = buffer;
2442
input->length = length;
2443
input->end = &buffer[length];
2448
* xmlParserHandlePEReference:
2449
* @ctxt: the parser context
2451
* [69] PEReference ::= '%' Name ';'
2453
* [ WFC: No Recursion ]
2454
* A parsed entity must not contain a recursive
2455
* reference to itself, either directly or indirectly.
2457
* [ WFC: Entity Declared ]
2458
* In a document without any DTD, a document with only an internal DTD
2459
* subset which contains no parameter entity references, or a document
2460
* with "standalone='yes'", ... ... The declaration of a parameter
2461
* entity must precede any reference to it...
2463
* [ VC: Entity Declared ]
2464
* In a document with an external subset or external parameter entities
2465
* with "standalone='no'", ... ... The declaration of a parameter entity
2466
* must precede any reference to it...
2469
* Parameter-entity references may only appear in the DTD.
2470
* NOTE: misleading but this is handled.
2472
* A PEReference may have been detected in the current input stream
2473
* the handling is done accordingly to
2474
* http://www.w3.org/TR/REC-xml#entproc
2476
* - Included in literal in entity values
2477
* - Included as Parameter Entity reference within DTDs
2480
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2481
const xmlChar *name;
2482
xmlEntityPtr entity = NULL;
2483
xmlParserInputPtr input;
2485
if (RAW != '%') return;
2486
switch(ctxt->instate) {
2487
case XML_PARSER_CDATA_SECTION:
2489
case XML_PARSER_COMMENT:
2491
case XML_PARSER_START_TAG:
2493
case XML_PARSER_END_TAG:
2495
case XML_PARSER_EOF:
2496
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2498
case XML_PARSER_PROLOG:
2499
case XML_PARSER_START:
2500
case XML_PARSER_MISC:
2501
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2503
case XML_PARSER_ENTITY_DECL:
2504
case XML_PARSER_CONTENT:
2505
case XML_PARSER_ATTRIBUTE_VALUE:
2507
case XML_PARSER_SYSTEM_LITERAL:
2508
case XML_PARSER_PUBLIC_LITERAL:
2509
/* we just ignore it there */
2511
case XML_PARSER_EPILOG:
2512
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2514
case XML_PARSER_ENTITY_VALUE:
2516
* NOTE: in the case of entity values, we don't do the
2517
* substitution here since we need the literal
2518
* entity value to be able to save the internal
2519
* subset of the document.
2520
* This will be handled by xmlStringDecodeEntities
2523
case XML_PARSER_DTD:
2525
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526
* In the internal DTD subset, parameter-entity references
2527
* can occur only where markup declarations can occur, not
2528
* within markup declarations.
2529
* In that case this is handled in xmlParseMarkupDecl
2531
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2533
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2536
case XML_PARSER_IGNORE:
2541
name = xmlParseName(ctxt);
2542
if (xmlParserDebugEntities)
2543
xmlGenericError(xmlGenericErrorContext,
2544
"PEReference: %s\n", name);
2546
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2550
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2552
if (ctxt->instate == XML_PARSER_EOF)
2554
if (entity == NULL) {
2557
* [ WFC: Entity Declared ]
2558
* In a document without any DTD, a document with only an
2559
* internal DTD subset which contains no parameter entity
2560
* references, or a document with "standalone='yes'", ...
2561
* ... The declaration of a parameter entity must precede
2562
* any reference to it...
2564
if ((ctxt->standalone == 1) ||
2565
((ctxt->hasExternalSubset == 0) &&
2566
(ctxt->hasPErefs == 0))) {
2567
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2568
"PEReference: %%%s; not found\n", name);
2571
* [ VC: Entity Declared ]
2572
* In a document with an external subset or external
2573
* parameter entities with "standalone='no'", ...
2574
* ... The declaration of a parameter entity must precede
2575
* any reference to it...
2577
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579
"PEReference: %%%s; not found\n",
2582
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583
"PEReference: %%%s; not found\n",
2587
} else if (ctxt->input->free != deallocblankswrapper) {
2588
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2589
if (xmlPushInput(ctxt, input) < 0)
2592
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2595
xmlCharEncoding enc;
2598
* handle the extra spaces added before and after
2599
* c.f. http://www.w3.org/TR/REC-xml#as-PE
2600
* this is done independently.
2602
input = xmlNewEntityInputStream(ctxt, entity);
2603
if (xmlPushInput(ctxt, input) < 0)
2607
* Get the 4 first bytes and decode the charset
2608
* if enc != XML_CHAR_ENCODING_NONE
2609
* plug some encoding conversion routines.
2610
* Note that, since we may have some non-UTF8
2611
* encoding (like UTF16, bug 135229), the 'length'
2612
* is not known, but we can calculate based upon
2613
* the amount of data in the buffer.
2616
if (ctxt->instate == XML_PARSER_EOF)
2618
if ((ctxt->input->end - ctxt->input->cur)>=4) {
2623
enc = xmlDetectCharEncoding(start, 4);
2624
if (enc != XML_CHAR_ENCODING_NONE) {
2625
xmlSwitchEncoding(ctxt, enc);
2629
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2630
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2631
(IS_BLANK_CH(NXT(5)))) {
2632
xmlParseTextDecl(ctxt);
2635
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2636
"PEReference: %s is not a parameter entity\n",
2641
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2647
* Macro used to grow the current buffer.
2648
* buffer##_size is expected to be a size_t
2649
* mem_error: is expected to handle memory allocation failures
2651
#define growBuffer(buffer, n) { \
2653
size_t new_size = buffer##_size * 2 + n; \
2654
if (new_size < buffer##_size) goto mem_error; \
2655
tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2656
if (tmp == NULL) goto mem_error; \
2658
buffer##_size = new_size; \
2662
* xmlStringLenDecodeEntities:
2663
* @ctxt: the parser context
2664
* @str: the input string
2665
* @len: the string length
2666
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2667
* @end: an end marker xmlChar, 0 if none
2668
* @end2: an end marker xmlChar, 0 if none
2669
* @end3: an end marker xmlChar, 0 if none
2671
* Takes a entity string content and process to do the adequate substitutions.
2673
* [67] Reference ::= EntityRef | CharRef
2675
* [69] PEReference ::= '%' Name ';'
2677
* Returns A newly allocated string with the substitution done. The caller
2678
* must deallocate it !
2681
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2682
int what, xmlChar end, xmlChar end2, xmlChar end3) {
2683
xmlChar *buffer = NULL;
2684
size_t buffer_size = 0;
2687
xmlChar *current = NULL;
2688
xmlChar *rep = NULL;
2689
const xmlChar *last;
2693
if ((ctxt == NULL) || (str == NULL) || (len < 0))
2697
if (((ctxt->depth > 40) &&
2698
((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2699
(ctxt->depth > 1024)) {
2700
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2705
* allocate a translation buffer.
2707
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2708
buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2709
if (buffer == NULL) goto mem_error;
2712
* OK loop until we reach one of the ending char or a size limit.
2713
* we are operating on already parsed values.
2716
c = CUR_SCHAR(str, l);
2719
while ((c != 0) && (c != end) && /* non input consuming loop */
2720
(c != end2) && (c != end3)) {
2723
if ((c == '&') && (str[1] == '#')) {
2724
int val = xmlParseStringCharRef(ctxt, &str);
2726
COPY_BUF(0,buffer,nbchars,val);
2728
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2732
if (xmlParserDebugEntities)
2733
xmlGenericError(xmlGenericErrorContext,
2734
"String decoding Entity Reference: %.30s\n",
2736
ent = xmlParseStringEntityRef(ctxt, &str);
2737
if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2738
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2741
ctxt->nbentities += ent->checked / 2;
2742
if ((ent != NULL) &&
2743
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2744
if (ent->content != NULL) {
2745
COPY_BUF(0,buffer,nbchars,ent->content[0]);
2746
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2747
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2750
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2751
"predefined entity has no content\n");
2753
} else if ((ent != NULL) && (ent->content != NULL)) {
2755
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2761
while (*current != 0) { /* non input consuming loop */
2762
buffer[nbchars++] = *current++;
2763
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2764
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2766
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2772
} else if (ent != NULL) {
2773
int i = xmlStrlen(ent->name);
2774
const xmlChar *cur = ent->name;
2776
buffer[nbchars++] = '&';
2777
if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2781
buffer[nbchars++] = *cur++;
2782
buffer[nbchars++] = ';';
2784
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2785
if (xmlParserDebugEntities)
2786
xmlGenericError(xmlGenericErrorContext,
2787
"String decoding PE Reference: %.30s\n", str);
2788
ent = xmlParseStringPEReference(ctxt, &str);
2789
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2792
ctxt->nbentities += ent->checked / 2;
2794
if (ent->content == NULL) {
2795
xmlLoadEntityContent(ctxt, ent);
2798
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2803
while (*current != 0) { /* non input consuming loop */
2804
buffer[nbchars++] = *current++;
2805
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2806
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2808
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2816
COPY_BUF(l,buffer,nbchars,c);
2818
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2819
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2823
c = CUR_SCHAR(str, l);
2827
buffer[nbchars] = 0;
2831
xmlErrMemory(ctxt, NULL);
2841
* xmlStringDecodeEntities:
2842
* @ctxt: the parser context
2843
* @str: the input string
2844
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2845
* @end: an end marker xmlChar, 0 if none
2846
* @end2: an end marker xmlChar, 0 if none
2847
* @end3: an end marker xmlChar, 0 if none
2849
* Takes a entity string content and process to do the adequate substitutions.
2851
* [67] Reference ::= EntityRef | CharRef
2853
* [69] PEReference ::= '%' Name ';'
2855
* Returns A newly allocated string with the substitution done. The caller
2856
* must deallocate it !
2859
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2860
xmlChar end, xmlChar end2, xmlChar end3) {
2861
if ((ctxt == NULL) || (str == NULL)) return(NULL);
2862
return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2866
/************************************************************************
2868
* Commodity functions, cleanup needed ? *
2870
************************************************************************/
2874
* @ctxt: an XML parser context
2876
* @len: the size of @str
2877
* @blank_chars: we know the chars are blanks
2879
* Is this a sequence of blank chars that one can ignore ?
2881
* Returns 1 if ignorable 0 otherwise.
2884
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2887
xmlNodePtr lastChild;
2890
* Don't spend time trying to differentiate them, the same callback is
2893
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2897
* Check for xml:space value.
2899
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2900
(*(ctxt->space) == -2))
2904
* Check that the string is made of blanks
2906
if (blank_chars == 0) {
2907
for (i = 0;i < len;i++)
2908
if (!(IS_BLANK_CH(str[i]))) return(0);
2912
* Look if the element is mixed content in the DTD if available
2914
if (ctxt->node == NULL) return(0);
2915
if (ctxt->myDoc != NULL) {
2916
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2917
if (ret == 0) return(1);
2918
if (ret == 1) return(0);
2922
* Otherwise, heuristic :-\
2924
if ((RAW != '<') && (RAW != 0xD)) return(0);
2925
if ((ctxt->node->children == NULL) &&
2926
(RAW == '<') && (NXT(1) == '/')) return(0);
2928
lastChild = xmlGetLastChild(ctxt->node);
2929
if (lastChild == NULL) {
2930
if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2931
(ctxt->node->content != NULL)) return(0);
2932
} else if (xmlNodeIsText(lastChild))
2934
else if ((ctxt->node->children != NULL) &&
2935
(xmlNodeIsText(ctxt->node->children)))
2940
/************************************************************************
2942
* Extra stuff for namespace support *
2943
* Relates to http://www.w3.org/TR/WD-xml-names *
2945
************************************************************************/
2949
* @ctxt: an XML parser context
2950
* @name: an XML parser context
2951
* @prefix: a xmlChar **
2953
* parse an UTF8 encoded XML qualified name string
2955
* [NS 5] QName ::= (Prefix ':')? LocalPart
2957
* [NS 6] Prefix ::= NCName
2959
* [NS 7] LocalPart ::= NCName
2961
* Returns the local part, and prefix is updated
2962
* to get the Prefix if any.
2966
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2967
xmlChar buf[XML_MAX_NAMELEN + 5];
2968
xmlChar *buffer = NULL;
2970
int max = XML_MAX_NAMELEN;
2971
xmlChar *ret = NULL;
2972
const xmlChar *cur = name;
2975
if (prefix == NULL) return(NULL);
2978
if (cur == NULL) return(NULL);
2980
#ifndef XML_XML_NAMESPACE
2981
/* xml: prefix is not really a namespace */
2982
if ((cur[0] == 'x') && (cur[1] == 'm') &&
2983
(cur[2] == 'l') && (cur[3] == ':'))
2984
return(xmlStrdup(name));
2987
/* nasty but well=formed */
2989
return(xmlStrdup(name));
2992
while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2998
* Okay someone managed to make a huge name, so he's ready to pay
2999
* for the processing speed.
3003
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004
if (buffer == NULL) {
3005
xmlErrMemory(ctxt, NULL);
3008
memcpy(buffer, buf, len);
3009
while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3010
if (len + 10 > max) {
3014
tmp = (xmlChar *) xmlRealloc(buffer,
3015
max * sizeof(xmlChar));
3018
xmlErrMemory(ctxt, NULL);
3029
if ((c == ':') && (*cur == 0)) {
3033
return(xmlStrdup(name));
3037
ret = xmlStrndup(buf, len);
3041
max = XML_MAX_NAMELEN;
3049
return(xmlStrndup(BAD_CAST "", 0));
3054
* Check that the first character is proper to start
3057
if (!(((c >= 0x61) && (c <= 0x7A)) ||
3058
((c >= 0x41) && (c <= 0x5A)) ||
3059
(c == '_') || (c == ':'))) {
3061
int first = CUR_SCHAR(cur, l);
3063
if (!IS_LETTER(first) && (first != '_')) {
3064
xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3065
"Name %s is not XML Namespace compliant\n",
3071
while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3077
* Okay someone managed to make a huge name, so he's ready to pay
3078
* for the processing speed.
3082
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3083
if (buffer == NULL) {
3084
xmlErrMemory(ctxt, NULL);
3087
memcpy(buffer, buf, len);
3088
while (c != 0) { /* tested bigname2.xml */
3089
if (len + 10 > max) {
3093
tmp = (xmlChar *) xmlRealloc(buffer,
3094
max * sizeof(xmlChar));
3096
xmlErrMemory(ctxt, NULL);
3109
ret = xmlStrndup(buf, len);
3118
/************************************************************************
3120
* The parser itself *
3121
* Relates to http://www.w3.org/TR/REC-xml *
3123
************************************************************************/
3125
/************************************************************************
3127
* Routines to parse Name, NCName and NmToken *
3129
************************************************************************/
3131
static unsigned long nbParseName = 0;
3132
static unsigned long nbParseNmToken = 0;
3133
static unsigned long nbParseNCName = 0;
3134
static unsigned long nbParseNCNameComplex = 0;
3135
static unsigned long nbParseNameComplex = 0;
3136
static unsigned long nbParseStringName = 0;
3140
* The two following functions are related to the change of accepted
3141
* characters for Name and NmToken in the Revision 5 of XML-1.0
3142
* They correspond to the modified production [4] and the new production [4a]
3143
* changes in that revision. Also note that the macros used for the
3144
* productions Letter, Digit, CombiningChar and Extender are not needed
3146
* We still keep compatibility to pre-revision5 parsing semantic if the
3147
* new XML_PARSE_OLD10 option is given to the parser.
3150
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3151
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3153
* Use the new checks of production [4] [4a] amd [5] of the
3154
* Update 5 of XML-1.0
3156
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3157
(((c >= 'a') && (c <= 'z')) ||
3158
((c >= 'A') && (c <= 'Z')) ||
3159
(c == '_') || (c == ':') ||
3160
((c >= 0xC0) && (c <= 0xD6)) ||
3161
((c >= 0xD8) && (c <= 0xF6)) ||
3162
((c >= 0xF8) && (c <= 0x2FF)) ||
3163
((c >= 0x370) && (c <= 0x37D)) ||
3164
((c >= 0x37F) && (c <= 0x1FFF)) ||
3165
((c >= 0x200C) && (c <= 0x200D)) ||
3166
((c >= 0x2070) && (c <= 0x218F)) ||
3167
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3168
((c >= 0x3001) && (c <= 0xD7FF)) ||
3169
((c >= 0xF900) && (c <= 0xFDCF)) ||
3170
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3171
((c >= 0x10000) && (c <= 0xEFFFF))))
3174
if (IS_LETTER(c) || (c == '_') || (c == ':'))
3181
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3182
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3184
* Use the new checks of production [4] [4a] amd [5] of the
3185
* Update 5 of XML-1.0
3187
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3188
(((c >= 'a') && (c <= 'z')) ||
3189
((c >= 'A') && (c <= 'Z')) ||
3190
((c >= '0') && (c <= '9')) || /* !start */
3191
(c == '_') || (c == ':') ||
3192
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3193
((c >= 0xC0) && (c <= 0xD6)) ||
3194
((c >= 0xD8) && (c <= 0xF6)) ||
3195
((c >= 0xF8) && (c <= 0x2FF)) ||
3196
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3197
((c >= 0x370) && (c <= 0x37D)) ||
3198
((c >= 0x37F) && (c <= 0x1FFF)) ||
3199
((c >= 0x200C) && (c <= 0x200D)) ||
3200
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3201
((c >= 0x2070) && (c <= 0x218F)) ||
3202
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203
((c >= 0x3001) && (c <= 0xD7FF)) ||
3204
((c >= 0xF900) && (c <= 0xFDCF)) ||
3205
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206
((c >= 0x10000) && (c <= 0xEFFFF))))
3209
if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3210
(c == '.') || (c == '-') ||
3211
(c == '_') || (c == ':') ||
3212
(IS_COMBINING(c)) ||
3219
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3220
int *len, int *alloc, int normalize);
3222
static const xmlChar *
3223
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3229
nbParseNameComplex++;
3233
* Handler for more complex cases
3236
if (ctxt->instate == XML_PARSER_EOF)
3239
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3241
* Use the new checks of production [4] [4a] amd [5] of the
3242
* Update 5 of XML-1.0
3244
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3245
(!(((c >= 'a') && (c <= 'z')) ||
3246
((c >= 'A') && (c <= 'Z')) ||
3247
(c == '_') || (c == ':') ||
3248
((c >= 0xC0) && (c <= 0xD6)) ||
3249
((c >= 0xD8) && (c <= 0xF6)) ||
3250
((c >= 0xF8) && (c <= 0x2FF)) ||
3251
((c >= 0x370) && (c <= 0x37D)) ||
3252
((c >= 0x37F) && (c <= 0x1FFF)) ||
3253
((c >= 0x200C) && (c <= 0x200D)) ||
3254
((c >= 0x2070) && (c <= 0x218F)) ||
3255
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256
((c >= 0x3001) && (c <= 0xD7FF)) ||
3257
((c >= 0xF900) && (c <= 0xFDCF)) ||
3258
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259
((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3266
(((c >= 'a') && (c <= 'z')) ||
3267
((c >= 'A') && (c <= 'Z')) ||
3268
((c >= '0') && (c <= '9')) || /* !start */
3269
(c == '_') || (c == ':') ||
3270
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3271
((c >= 0xC0) && (c <= 0xD6)) ||
3272
((c >= 0xD8) && (c <= 0xF6)) ||
3273
((c >= 0xF8) && (c <= 0x2FF)) ||
3274
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3275
((c >= 0x370) && (c <= 0x37D)) ||
3276
((c >= 0x37F) && (c <= 0x1FFF)) ||
3277
((c >= 0x200C) && (c <= 0x200D)) ||
3278
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3279
((c >= 0x2070) && (c <= 0x218F)) ||
3280
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3281
((c >= 0x3001) && (c <= 0xD7FF)) ||
3282
((c >= 0xF900) && (c <= 0xFDCF)) ||
3283
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3284
((c >= 0x10000) && (c <= 0xEFFFF))
3286
if (count++ > XML_PARSER_CHUNK_SIZE) {
3289
if (ctxt->instate == XML_PARSER_EOF)
3297
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3298
(!IS_LETTER(c) && (c != '_') &&
3306
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3307
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3308
(c == '.') || (c == '-') ||
3309
(c == '_') || (c == ':') ||
3310
(IS_COMBINING(c)) ||
3311
(IS_EXTENDER(c)))) {
3312
if (count++ > XML_PARSER_CHUNK_SIZE) {
3315
if (ctxt->instate == XML_PARSER_EOF)
3324
if (ctxt->instate == XML_PARSER_EOF)
3330
if ((len > XML_MAX_NAME_LENGTH) &&
3331
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3332
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3335
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3336
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3337
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3342
* @ctxt: an XML parser context
3344
* parse an XML name.
3346
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3347
* CombiningChar | Extender
3349
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
* [6] Names ::= Name (#x20 Name)*
3353
* Returns the Name parsed or NULL
3357
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
* Accelerator for simple ASCII names
3371
in = ctxt->input->cur;
3372
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
((*in >= 0x41) && (*in <= 0x5A)) ||
3374
(*in == '_') || (*in == ':')) {
3376
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
((*in >= 0x41) && (*in <= 0x5A)) ||
3378
((*in >= 0x30) && (*in <= 0x39)) ||
3379
(*in == '_') || (*in == '-') ||
3380
(*in == ':') || (*in == '.'))
3382
if ((*in > 0) && (*in < 0x80)) {
3383
count = in - ctxt->input->cur;
3384
if ((count > XML_MAX_NAME_LENGTH) &&
3385
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3389
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3390
ctxt->input->cur = in;
3391
ctxt->nbChars += count;
3392
ctxt->input->col += count;
3394
xmlErrMemory(ctxt, NULL);
3398
/* accelerator for special cases */
3399
return(xmlParseNameComplex(ctxt));
3402
static const xmlChar *
3403
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3409
nbParseNCNameComplex++;
3413
* Handler for more complex cases
3417
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3418
(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3422
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3423
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
3424
if (count++ > XML_PARSER_CHUNK_SIZE) {
3425
if ((len > XML_MAX_NAME_LENGTH) &&
3426
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3427
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432
if (ctxt->instate == XML_PARSER_EOF)
3441
if (ctxt->instate == XML_PARSER_EOF)
3446
if ((len > XML_MAX_NAME_LENGTH) &&
3447
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3448
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3451
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3456
* @ctxt: an XML parser context
3457
* @len: length of the string parsed
3459
* parse an XML name.
3461
* [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3462
* CombiningChar | Extender
3464
* [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3466
* Returns the Name parsed or NULL
3469
static const xmlChar *
3470
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3480
* Accelerator for simple ASCII names
3482
in = ctxt->input->cur;
3483
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3484
((*in >= 0x41) && (*in <= 0x5A)) ||
3487
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3488
((*in >= 0x41) && (*in <= 0x5A)) ||
3489
((*in >= 0x30) && (*in <= 0x39)) ||
3490
(*in == '_') || (*in == '-') ||
3493
if ((*in > 0) && (*in < 0x80)) {
3494
count = in - ctxt->input->cur;
3495
if ((count > XML_MAX_NAME_LENGTH) &&
3496
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3497
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3500
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501
ctxt->input->cur = in;
3502
ctxt->nbChars += count;
3503
ctxt->input->col += count;
3505
xmlErrMemory(ctxt, NULL);
3510
return(xmlParseNCNameComplex(ctxt));
3514
* xmlParseNameAndCompare:
3515
* @ctxt: an XML parser context
3517
* parse an XML name and compares for match
3518
* (specialized for endtag parsing)
3520
* Returns NULL for an illegal name, (xmlChar*) 1 for success
3521
* and the name for mismatch
3524
static const xmlChar *
3525
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3526
register const xmlChar *cmp = other;
3527
register const xmlChar *in;
3531
if (ctxt->instate == XML_PARSER_EOF)
3534
in = ctxt->input->cur;
3535
while (*in != 0 && *in == *cmp) {
3540
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3542
ctxt->input->cur = in;
3543
return (const xmlChar*) 1;
3545
/* failure (or end of input buffer), check with full function */
3546
ret = xmlParseName (ctxt);
3547
/* strings coming from the dictionnary direct compare possible */
3549
return (const xmlChar*) 1;
3555
* xmlParseStringName:
3556
* @ctxt: an XML parser context
3557
* @str: a pointer to the string pointer (IN/OUT)
3559
* parse an XML name.
3561
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562
* CombiningChar | Extender
3564
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3566
* [6] Names ::= Name (#x20 Name)*
3568
* Returns the Name parsed or NULL. The @str pointer
3569
* is updated to the current location in the string.
3573
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574
xmlChar buf[XML_MAX_NAMELEN + 5];
3575
const xmlChar *cur = *str;
3580
nbParseStringName++;
3583
c = CUR_SCHAR(cur, l);
3584
if (!xmlIsNameStartChar(ctxt, c)) {
3588
COPY_BUF(l,buf,len,c);
3590
c = CUR_SCHAR(cur, l);
3591
while (xmlIsNameChar(ctxt, c)) {
3592
COPY_BUF(l,buf,len,c);
3594
c = CUR_SCHAR(cur, l);
3595
if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3597
* Okay someone managed to make a huge name, so he's ready to pay
3598
* for the processing speed.
3603
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3604
if (buffer == NULL) {
3605
xmlErrMemory(ctxt, NULL);
3608
memcpy(buffer, buf, len);
3609
while (xmlIsNameChar(ctxt, c)) {
3610
if (len + 10 > max) {
3613
if ((len > XML_MAX_NAME_LENGTH) &&
3614
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3615
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3620
tmp = (xmlChar *) xmlRealloc(buffer,
3621
max * sizeof(xmlChar));
3623
xmlErrMemory(ctxt, NULL);
3629
COPY_BUF(l,buffer,len,c);
3631
c = CUR_SCHAR(cur, l);
3638
if ((len > XML_MAX_NAME_LENGTH) &&
3639
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3640
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3644
return(xmlStrndup(buf, len));
3649
* @ctxt: an XML parser context
3651
* parse an XML Nmtoken.
3653
* [7] Nmtoken ::= (NameChar)+
3655
* [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3657
* Returns the Nmtoken parsed or NULL
3661
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3662
xmlChar buf[XML_MAX_NAMELEN + 5];
3672
if (ctxt->instate == XML_PARSER_EOF)
3676
while (xmlIsNameChar(ctxt, c)) {
3677
if (count++ > XML_PARSER_CHUNK_SIZE) {
3681
COPY_BUF(l,buf,len,c);
3687
if (ctxt->instate == XML_PARSER_EOF)
3691
if (len >= XML_MAX_NAMELEN) {
3693
* Okay someone managed to make a huge token, so he's ready to pay
3694
* for the processing speed.
3699
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3700
if (buffer == NULL) {
3701
xmlErrMemory(ctxt, NULL);
3704
memcpy(buffer, buf, len);
3705
while (xmlIsNameChar(ctxt, c)) {
3706
if (count++ > XML_PARSER_CHUNK_SIZE) {
3709
if (ctxt->instate == XML_PARSER_EOF) {
3714
if (len + 10 > max) {
3717
if ((max > XML_MAX_NAME_LENGTH) &&
3718
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3719
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3724
tmp = (xmlChar *) xmlRealloc(buffer,
3725
max * sizeof(xmlChar));
3727
xmlErrMemory(ctxt, NULL);
3733
COPY_BUF(l,buffer,len,c);
3743
if ((len > XML_MAX_NAME_LENGTH) &&
3744
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3745
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748
return(xmlStrndup(buf, len));
3752
* xmlParseEntityValue:
3753
* @ctxt: an XML parser context
3754
* @orig: if non-NULL store a copy of the original entity value
3756
* parse a value for ENTITY declarations
3758
* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3759
* "'" ([^%&'] | PEReference | Reference)* "'"
3761
* Returns the EntityValue parsed with reference substituted or NULL
3765
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3766
xmlChar *buf = NULL;
3768
int size = XML_PARSER_BUFFER_SIZE;
3771
xmlChar *ret = NULL;
3772
const xmlChar *cur = NULL;
3773
xmlParserInputPtr input;
3775
if (RAW == '"') stop = '"';
3776
else if (RAW == '\'') stop = '\'';
3778
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3781
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3783
xmlErrMemory(ctxt, NULL);
3788
* The content of the entity definition is copied in a buffer.
3791
ctxt->instate = XML_PARSER_ENTITY_VALUE;
3792
input = ctxt->input;
3794
if (ctxt->instate == XML_PARSER_EOF) {
3801
* NOTE: 4.4.5 Included in Literal
3802
* When a parameter entity reference appears in a literal entity
3803
* value, ... a single or double quote character in the replacement
3804
* text is always treated as a normal data character and will not
3805
* terminate the literal.
3806
* In practice it means we stop the loop only when back at parsing
3807
* the initial entity and the quote is found
3809
while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3810
(ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3811
if (len + 5 >= size) {
3815
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3817
xmlErrMemory(ctxt, NULL);
3823
COPY_BUF(l,buf,len,c);
3826
* Pop-up of finished entities.
3828
while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3839
if (ctxt->instate == XML_PARSER_EOF) {
3845
* Raise problem w.r.t. '&' and '%' being used in non-entities
3846
* reference constructs. Note Charref will be handled in
3847
* xmlStringDecodeEntities()
3850
while (*cur != 0) { /* non input consuming */
3851
if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3856
name = xmlParseStringName(ctxt, &cur);
3857
if ((name == NULL) || (*cur != ';')) {
3858
xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3859
"EntityValue: '%c' forbidden except for entities references\n",
3862
if ((tmp == '%') && (ctxt->inSubset == 1) &&
3863
(ctxt->inputNr == 1)) {
3864
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3875
* Then PEReference entities are substituted.
3878
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3883
* NOTE: 4.4.7 Bypassed
3884
* When a general entity reference appears in the EntityValue in
3885
* an entity declaration, it is bypassed and left as is.
3886
* so XML_SUBSTITUTE_REF is not set here.
3888
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3900
* xmlParseAttValueComplex:
3901
* @ctxt: an XML parser context
3902
* @len: the resulting attribute len
3903
* @normalize: wether to apply the inner normalization
3905
* parse a value for an attribute, this is the fallback function
3906
* of xmlParseAttValue() when the attribute parsing requires handling
3907
* of non-ASCII characters, or normalization compaction.
3909
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3912
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3914
xmlChar *buf = NULL;
3915
xmlChar *rep = NULL;
3917
size_t buf_size = 0;
3918
int c, l, in_space = 0;
3919
xmlChar *current = NULL;
3922
if (NXT(0) == '"') {
3923
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3926
} else if (NXT(0) == '\'') {
3928
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3931
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3936
* allocate a translation buffer.
3938
buf_size = XML_PARSER_BUFFER_SIZE;
3939
buf = (xmlChar *) xmlMallocAtomic(buf_size);
3940
if (buf == NULL) goto mem_error;
3943
* OK loop until we reach one of the ending char or a size limit.
3946
while (((NXT(0) != limit) && /* checked */
3947
(IS_CHAR(c)) && (c != '<')) &&
3948
(ctxt->instate != XML_PARSER_EOF)) {
3950
* Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3951
* special option is given
3953
if ((len > XML_MAX_TEXT_LENGTH) &&
3954
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3955
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3956
"AttValue length too long\n");
3962
if (NXT(1) == '#') {
3963
int val = xmlParseCharRef(ctxt);
3966
if (ctxt->replaceEntities) {
3967
if (len + 10 > buf_size) {
3968
growBuffer(buf, 10);
3973
* The reparsing will be done in xmlStringGetNodeList()
3974
* called by the attribute() function in SAX.c
3976
if (len + 10 > buf_size) {
3977
growBuffer(buf, 10);
3985
} else if (val != 0) {
3986
if (len + 10 > buf_size) {
3987
growBuffer(buf, 10);
3989
len += xmlCopyChar(0, &buf[len], val);
3992
ent = xmlParseEntityRef(ctxt);
3995
ctxt->nbentities += ent->owner;
3996
if ((ent != NULL) &&
3997
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3998
if (len + 10 > buf_size) {
3999
growBuffer(buf, 10);
4001
if ((ctxt->replaceEntities == 0) &&
4002
(ent->content[0] == '&')) {
4009
buf[len++] = ent->content[0];
4011
} else if ((ent != NULL) &&
4012
(ctxt->replaceEntities != 0)) {
4013
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4014
rep = xmlStringDecodeEntities(ctxt, ent->content,
4019
while (*current != 0) { /* non input consuming */
4020
if ((*current == 0xD) || (*current == 0xA) ||
4021
(*current == 0x9)) {
4025
buf[len++] = *current++;
4026
if (len + 10 > buf_size) {
4027
growBuffer(buf, 10);
4034
if (len + 10 > buf_size) {
4035
growBuffer(buf, 10);
4037
if (ent->content != NULL)
4038
buf[len++] = ent->content[0];
4040
} else if (ent != NULL) {
4041
int i = xmlStrlen(ent->name);
4042
const xmlChar *cur = ent->name;
4045
* This may look absurd but is needed to detect
4048
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4049
(ent->content != NULL) && (ent->checked == 0)) {
4050
unsigned long oldnbent = ctxt->nbentities;
4052
rep = xmlStringDecodeEntities(ctxt, ent->content,
4053
XML_SUBSTITUTE_REF, 0, 0, 0);
4055
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4057
if (xmlStrchr(rep, '<'))
4065
* Just output the reference
4068
while (len + i + 10 > buf_size) {
4069
growBuffer(buf, i + 10);
4072
buf[len++] = *cur++;
4077
if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4078
if ((len != 0) || (!normalize)) {
4079
if ((!normalize) || (!in_space)) {
4080
COPY_BUF(l,buf,len,0x20);
4081
while (len + 10 > buf_size) {
4082
growBuffer(buf, 10);
4089
COPY_BUF(l,buf,len,c);
4090
if (len + 10 > buf_size) {
4091
growBuffer(buf, 10);
4099
if (ctxt->instate == XML_PARSER_EOF)
4102
if ((in_space) && (normalize)) {
4103
while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4107
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4108
} else if (RAW != limit) {
4109
if ((c != 0) && (!IS_CHAR(c))) {
4110
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4111
"invalid character in attribute value\n");
4113
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4114
"AttValue: ' expected\n");
4120
* There we potentially risk an overflow, don't allow attribute value of
4121
* length more than INT_MAX it is a very reasonnable assumption !
4123
if (len >= INT_MAX) {
4124
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4125
"AttValue length too long\n");
4129
if (attlen != NULL) *attlen = (int) len;
4133
xmlErrMemory(ctxt, NULL);
4144
* @ctxt: an XML parser context
4146
* parse a value for an attribute
4147
* Note: the parser won't do substitution of entities here, this
4148
* will be handled later in xmlStringGetNodeList
4150
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4151
* "'" ([^<&'] | Reference)* "'"
4153
* 3.3.3 Attribute-Value Normalization:
4154
* Before the value of an attribute is passed to the application or
4155
* checked for validity, the XML processor must normalize it as follows:
4156
* - a character reference is processed by appending the referenced
4157
* character to the attribute value
4158
* - an entity reference is processed by recursively processing the
4159
* replacement text of the entity
4160
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4161
* appending #x20 to the normalized value, except that only a single
4162
* #x20 is appended for a "#xD#xA" sequence that is part of an external
4163
* parsed entity or the literal entity value of an internal parsed entity
4164
* - other characters are processed by appending them to the normalized value
4165
* If the declared value is not CDATA, then the XML processor must further
4166
* process the normalized attribute value by discarding any leading and
4167
* trailing space (#x20) characters, and by replacing sequences of space
4168
* (#x20) characters by a single space (#x20) character.
4169
* All attributes for which no declaration has been read should be treated
4170
* by a non-validating parser as if declared CDATA.
4172
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4177
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4178
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4179
return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4183
* xmlParseSystemLiteral:
4184
* @ctxt: an XML parser context
4186
* parse an XML Literal
4188
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4190
* Returns the SystemLiteral parsed or NULL
4194
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4195
xmlChar *buf = NULL;
4197
int size = XML_PARSER_BUFFER_SIZE;
4200
int state = ctxt->instate;
4207
} else if (RAW == '\'') {
4211
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4215
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4217
xmlErrMemory(ctxt, NULL);
4220
ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4222
while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4223
if (len + 5 >= size) {
4226
if ((size > XML_MAX_NAME_LENGTH) &&
4227
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4228
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4230
ctxt->instate = (xmlParserInputState) state;
4234
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4237
xmlErrMemory(ctxt, NULL);
4238
ctxt->instate = (xmlParserInputState) state;
4247
if (ctxt->instate == XML_PARSER_EOF) {
4252
COPY_BUF(l,buf,len,cur);
4262
ctxt->instate = (xmlParserInputState) state;
4263
if (!IS_CHAR(cur)) {
4264
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4272
* xmlParsePubidLiteral:
4273
* @ctxt: an XML parser context
4275
* parse an XML public literal
4277
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4279
* Returns the PubidLiteral parsed or NULL.
4283
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4284
xmlChar *buf = NULL;
4286
int size = XML_PARSER_BUFFER_SIZE;
4290
xmlParserInputState oldstate = ctxt->instate;
4296
} else if (RAW == '\'') {
4300
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4303
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4305
xmlErrMemory(ctxt, NULL);
4308
ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4310
while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4311
if (len + 1 >= size) {
4314
if ((size > XML_MAX_NAME_LENGTH) &&
4315
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4316
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4321
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4323
xmlErrMemory(ctxt, NULL);
4334
if (ctxt->instate == XML_PARSER_EOF) {
4349
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4353
ctxt->instate = oldstate;
4357
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4360
* used for the test in the inner loop of the char data testing
4362
static const unsigned char test_char_data[256] = {
4363
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364
0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4365
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4368
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4369
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4370
0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4371
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4372
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4373
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4374
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4375
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4376
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4377
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4378
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4379
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4380
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4399
* @ctxt: an XML parser context
4400
* @cdata: int indicating whether we are within a CDATA section
4402
* parse a CharData section.
4403
* if we are within a CDATA section ']]>' marks an end of section.
4405
* The right angle bracket (>) may be represented using the string ">",
4406
* and must, for compatibility, be escaped using ">" or a character
4407
* reference when it appears in the string "]]>" in content, when that
4408
* string is not marking the end of a CDATA section.
4410
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4414
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4417
int line = ctxt->input->line;
4418
int col = ctxt->input->col;
4424
* Accelerated common case where input don't need to be
4425
* modified before passing it to the handler.
4428
in = ctxt->input->cur;
4431
while (*in == 0x20) { in++; ctxt->input->col++; }
4434
ctxt->input->line++; ctxt->input->col = 1;
4436
} while (*in == 0xA);
4437
goto get_more_space;
4440
nbchar = in - ctxt->input->cur;
4442
const xmlChar *tmp = ctxt->input->cur;
4443
ctxt->input->cur = in;
4445
if ((ctxt->sax != NULL) &&
4446
(ctxt->sax->ignorableWhitespace !=
4447
ctxt->sax->characters)) {
4448
if (areBlanks(ctxt, tmp, nbchar, 1)) {
4449
if (ctxt->sax->ignorableWhitespace != NULL)
4450
ctxt->sax->ignorableWhitespace(ctxt->userData,
4453
if (ctxt->sax->characters != NULL)
4454
ctxt->sax->characters(ctxt->userData,
4456
if (*ctxt->space == -1)
4459
} else if ((ctxt->sax != NULL) &&
4460
(ctxt->sax->characters != NULL)) {
4461
ctxt->sax->characters(ctxt->userData,
4469
ccol = ctxt->input->col;
4470
while (test_char_data[*in]) {
4474
ctxt->input->col = ccol;
4477
ctxt->input->line++; ctxt->input->col = 1;
4479
} while (*in == 0xA);
4483
if ((in[1] == ']') && (in[2] == '>')) {
4484
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4485
ctxt->input->cur = in;
4492
nbchar = in - ctxt->input->cur;
4494
if ((ctxt->sax != NULL) &&
4495
(ctxt->sax->ignorableWhitespace !=
4496
ctxt->sax->characters) &&
4497
(IS_BLANK_CH(*ctxt->input->cur))) {
4498
const xmlChar *tmp = ctxt->input->cur;
4499
ctxt->input->cur = in;
4501
if (areBlanks(ctxt, tmp, nbchar, 0)) {
4502
if (ctxt->sax->ignorableWhitespace != NULL)
4503
ctxt->sax->ignorableWhitespace(ctxt->userData,
4506
if (ctxt->sax->characters != NULL)
4507
ctxt->sax->characters(ctxt->userData,
4509
if (*ctxt->space == -1)
4512
line = ctxt->input->line;
4513
col = ctxt->input->col;
4514
} else if (ctxt->sax != NULL) {
4515
if (ctxt->sax->characters != NULL)
4516
ctxt->sax->characters(ctxt->userData,
4517
ctxt->input->cur, nbchar);
4518
line = ctxt->input->line;
4519
col = ctxt->input->col;
4521
/* something really bad happened in the SAX callback */
4522
if (ctxt->instate != XML_PARSER_CONTENT)
4525
ctxt->input->cur = in;
4529
ctxt->input->cur = in;
4531
ctxt->input->line++; ctxt->input->col = 1;
4532
continue; /* while */
4544
if (ctxt->instate == XML_PARSER_EOF)
4546
in = ctxt->input->cur;
4547
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4550
ctxt->input->line = line;
4551
ctxt->input->col = col;
4552
xmlParseCharDataComplex(ctxt, cdata);
4556
* xmlParseCharDataComplex:
4557
* @ctxt: an XML parser context
4558
* @cdata: int indicating whether we are within a CDATA section
4560
* parse a CharData section.this is the fallback function
4561
* of xmlParseCharData() when the parsing requires handling
4562
* of non-ASCII characters.
4565
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4566
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4574
while ((cur != '<') && /* checked */
4576
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4577
if ((cur == ']') && (NXT(1) == ']') &&
4581
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4584
COPY_BUF(l,buf,nbchar,cur);
4585
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4589
* OK the segment is to be consumed as chars.
4591
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4592
if (areBlanks(ctxt, buf, nbchar, 0)) {
4593
if (ctxt->sax->ignorableWhitespace != NULL)
4594
ctxt->sax->ignorableWhitespace(ctxt->userData,
4597
if (ctxt->sax->characters != NULL)
4598
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4599
if ((ctxt->sax->characters !=
4600
ctxt->sax->ignorableWhitespace) &&
4601
(*ctxt->space == -1))
4606
/* something really bad happened in the SAX callback */
4607
if (ctxt->instate != XML_PARSER_CONTENT)
4614
if (ctxt->instate == XML_PARSER_EOF)
4623
* OK the segment is to be consumed as chars.
4625
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4626
if (areBlanks(ctxt, buf, nbchar, 0)) {
4627
if (ctxt->sax->ignorableWhitespace != NULL)
4628
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4630
if (ctxt->sax->characters != NULL)
4631
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4632
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4633
(*ctxt->space == -1))
4638
if ((cur != 0) && (!IS_CHAR(cur))) {
4639
/* Generate the error and skip the offending character */
4640
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4641
"PCDATA invalid Char value %d\n",
4648
* xmlParseExternalID:
4649
* @ctxt: an XML parser context
4650
* @publicID: a xmlChar** receiving PubidLiteral
4651
* @strict: indicate whether we should restrict parsing to only
4652
* production [75], see NOTE below
4654
* Parse an External ID or a Public ID
4656
* NOTE: Productions [75] and [83] interact badly since [75] can generate
4657
* 'PUBLIC' S PubidLiteral S SystemLiteral
4659
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4660
* | 'PUBLIC' S PubidLiteral S SystemLiteral
4662
* [83] PublicID ::= 'PUBLIC' S PubidLiteral
4664
* Returns the function returns SystemLiteral and in the second
4665
* case publicID receives PubidLiteral, is strict is off
4666
* it is possible to return NULL and have publicID set.
4670
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4671
xmlChar *URI = NULL;
4676
if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4678
if (!IS_BLANK_CH(CUR)) {
4679
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4680
"Space required after 'SYSTEM'\n");
4683
URI = xmlParseSystemLiteral(ctxt);
4685
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4687
} else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4689
if (!IS_BLANK_CH(CUR)) {
4690
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4691
"Space required after 'PUBLIC'\n");
4694
*publicID = xmlParsePubidLiteral(ctxt);
4695
if (*publicID == NULL) {
4696
xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4700
* We don't handle [83] so "S SystemLiteral" is required.
4702
if (!IS_BLANK_CH(CUR)) {
4703
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4704
"Space required after the Public Identifier\n");
4708
* We handle [83] so we return immediately, if
4709
* "S SystemLiteral" is not detected. From a purely parsing
4710
* point of view that's a nice mess.
4716
if (!IS_BLANK_CH(*ptr)) return(NULL);
4718
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4719
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4722
URI = xmlParseSystemLiteral(ctxt);
4724
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4731
* xmlParseCommentComplex:
4732
* @ctxt: an XML parser context
4733
* @buf: the already parsed part of the buffer
4734
* @len: number of bytes filles in the buffer
4735
* @size: allocated size of the buffer
4737
* Skip an XML (SGML) comment <!-- .... -->
4738
* The spec says that "For compatibility, the string "--" (double-hyphen)
4739
* must not occur within comments. "
4740
* This is the slow routine in case the accelerator for ascii didn't work
4742
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4745
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4746
size_t len, size_t size) {
4753
inputid = ctxt->input->id;
4757
size = XML_PARSER_BUFFER_SIZE;
4758
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4760
xmlErrMemory(ctxt, NULL);
4764
GROW; /* Assure there's enough input data */
4767
goto not_terminated;
4769
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4770
"xmlParseComment: invalid xmlChar value %d\n",
4778
goto not_terminated;
4780
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4781
"xmlParseComment: invalid xmlChar value %d\n",
4789
goto not_terminated;
4790
while (IS_CHAR(cur) && /* checked */
4792
(r != '-') || (q != '-'))) {
4793
if ((r == '-') && (q == '-')) {
4794
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4796
if ((len > XML_MAX_TEXT_LENGTH) &&
4797
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4798
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4799
"Comment too big found", NULL);
4803
if (len + 5 >= size) {
4807
new_size = size * 2;
4808
new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4809
if (new_buf == NULL) {
4811
xmlErrMemory(ctxt, NULL);
4817
COPY_BUF(ql,buf,len,q);
4827
if (ctxt->instate == XML_PARSER_EOF) {
4842
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4843
"Comment not terminated \n<!--%.50s\n", buf);
4844
} else if (!IS_CHAR(cur)) {
4845
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4846
"xmlParseComment: invalid xmlChar value %d\n",
4849
if (inputid != ctxt->input->id) {
4850
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4851
"Comment doesn't start and stop in the same entity\n");
4854
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4855
(!ctxt->disableSAX))
4856
ctxt->sax->comment(ctxt->userData, buf);
4861
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862
"Comment not terminated\n", NULL);
4869
* @ctxt: an XML parser context
4871
* Skip an XML (SGML) comment <!-- .... -->
4872
* The spec says that "For compatibility, the string "--" (double-hyphen)
4873
* must not occur within comments. "
4875
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4878
xmlParseComment(xmlParserCtxtPtr ctxt) {
4879
xmlChar *buf = NULL;
4880
size_t size = XML_PARSER_BUFFER_SIZE;
4882
xmlParserInputState state;
4889
* Check that there is a comment right here.
4891
if ((RAW != '<') || (NXT(1) != '!') ||
4892
(NXT(2) != '-') || (NXT(3) != '-')) return;
4893
state = ctxt->instate;
4894
ctxt->instate = XML_PARSER_COMMENT;
4895
inputid = ctxt->input->id;
4901
* Accelerated common case where input don't need to be
4902
* modified before passing it to the handler.
4904
in = ctxt->input->cur;
4908
ctxt->input->line++; ctxt->input->col = 1;
4910
} while (*in == 0xA);
4913
ccol = ctxt->input->col;
4914
while (((*in > '-') && (*in <= 0x7F)) ||
4915
((*in >= 0x20) && (*in < '-')) ||
4920
ctxt->input->col = ccol;
4923
ctxt->input->line++; ctxt->input->col = 1;
4925
} while (*in == 0xA);
4928
nbchar = in - ctxt->input->cur;
4930
* save current set of data
4933
if ((ctxt->sax != NULL) &&
4934
(ctxt->sax->comment != NULL)) {
4936
if ((*in == '-') && (in[1] == '-'))
4939
size = XML_PARSER_BUFFER_SIZE + nbchar;
4940
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4942
xmlErrMemory(ctxt, NULL);
4943
ctxt->instate = state;
4947
} else if (len + nbchar + 1 >= size) {
4949
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4950
new_buf = (xmlChar *) xmlRealloc(buf,
4951
size * sizeof(xmlChar));
4952
if (new_buf == NULL) {
4954
xmlErrMemory(ctxt, NULL);
4955
ctxt->instate = state;
4960
memcpy(&buf[len], ctxt->input->cur, nbchar);
4965
if ((len > XML_MAX_TEXT_LENGTH) &&
4966
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4967
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4968
"Comment too big found", NULL);
4972
ctxt->input->cur = in;
4975
ctxt->input->line++; ctxt->input->col = 1;
4980
ctxt->input->cur = in;
4982
ctxt->input->line++; ctxt->input->col = 1;
4983
continue; /* while */
4989
if (ctxt->instate == XML_PARSER_EOF) {
4993
in = ctxt->input->cur;
4997
if (ctxt->input->id != inputid) {
4998
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999
"comment doesn't start and stop in the same entity\n");
5002
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5003
(!ctxt->disableSAX)) {
5005
ctxt->sax->comment(ctxt->userData, buf);
5007
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5011
if (ctxt->instate != XML_PARSER_EOF)
5012
ctxt->instate = state;
5016
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5017
"Double hyphen within comment: "
5021
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5022
"Double hyphen within comment\n", NULL);
5030
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5031
xmlParseCommentComplex(ctxt, buf, len, size);
5032
ctxt->instate = state;
5039
* @ctxt: an XML parser context
5041
* parse the name of a PI
5043
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5045
* Returns the PITarget name or NULL
5049
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5050
const xmlChar *name;
5052
name = xmlParseName(ctxt);
5053
if ((name != NULL) &&
5054
((name[0] == 'x') || (name[0] == 'X')) &&
5055
((name[1] == 'm') || (name[1] == 'M')) &&
5056
((name[2] == 'l') || (name[2] == 'L'))) {
5058
if ((name[0] == 'x') && (name[1] == 'm') &&
5059
(name[2] == 'l') && (name[3] == 0)) {
5060
xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5061
"XML declaration allowed only at the start of the document\n");
5063
} else if (name[3] == 0) {
5064
xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5068
if (xmlW3CPIs[i] == NULL) break;
5069
if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5072
xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5073
"xmlParsePITarget: invalid name prefix 'xml'\n",
5076
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5077
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5078
"colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5083
#ifdef LIBXML_CATALOG_ENABLED
5085
* xmlParseCatalogPI:
5086
* @ctxt: an XML parser context
5087
* @catalog: the PI value string
5089
* parse an XML Catalog Processing Instruction.
5091
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5093
* Occurs only if allowed by the user and if happening in the Misc
5094
* part of the document before any doctype informations
5095
* This will add the given catalog to the parsing context in order
5096
* to be used if there is a resolution need further down in the document
5100
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5101
xmlChar *URL = NULL;
5102
const xmlChar *tmp, *base;
5106
while (IS_BLANK_CH(*tmp)) tmp++;
5107
if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5110
while (IS_BLANK_CH(*tmp)) tmp++;
5115
while (IS_BLANK_CH(*tmp)) tmp++;
5117
if ((marker != '\'') && (marker != '"'))
5121
while ((*tmp != 0) && (*tmp != marker)) tmp++;
5124
URL = xmlStrndup(base, tmp - base);
5126
while (IS_BLANK_CH(*tmp)) tmp++;
5131
ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5137
xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5138
"Catalog PI syntax error: %s\n",
5147
* @ctxt: an XML parser context
5149
* parse an XML Processing Instruction.
5151
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5153
* The processing is transfered to SAX once parsed.
5157
xmlParsePI(xmlParserCtxtPtr ctxt) {
5158
xmlChar *buf = NULL;
5160
size_t size = XML_PARSER_BUFFER_SIZE;
5162
const xmlChar *target;
5163
xmlParserInputState state;
5166
if ((RAW == '<') && (NXT(1) == '?')) {
5167
xmlParserInputPtr input = ctxt->input;
5168
state = ctxt->instate;
5169
ctxt->instate = XML_PARSER_PI;
5171
* this is a Processing Instruction.
5177
* Parse the target name and check for special support like
5180
target = xmlParsePITarget(ctxt);
5181
if (target != NULL) {
5182
if ((RAW == '?') && (NXT(1) == '>')) {
5183
if (input != ctxt->input) {
5184
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5185
"PI declaration doesn't start and stop in the same entity\n");
5192
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5193
(ctxt->sax->processingInstruction != NULL))
5194
ctxt->sax->processingInstruction(ctxt->userData,
5196
if (ctxt->instate != XML_PARSER_EOF)
5197
ctxt->instate = state;
5200
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5202
xmlErrMemory(ctxt, NULL);
5203
ctxt->instate = state;
5207
if (!IS_BLANK(cur)) {
5208
xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5209
"ParsePI: PI %s space expected\n", target);
5213
while (IS_CHAR(cur) && /* checked */
5214
((cur != '?') || (NXT(1) != '>'))) {
5215
if (len + 5 >= size) {
5217
size_t new_size = size * 2;
5218
tmp = (xmlChar *) xmlRealloc(buf, new_size);
5220
xmlErrMemory(ctxt, NULL);
5222
ctxt->instate = state;
5231
if (ctxt->instate == XML_PARSER_EOF) {
5236
if ((len > XML_MAX_TEXT_LENGTH) &&
5237
((ctxt->options & XML_PARSE_HUGE) == 0)) {
5238
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5239
"PI %s too big found", target);
5241
ctxt->instate = state;
5245
COPY_BUF(l,buf,len,cur);
5254
if ((len > XML_MAX_TEXT_LENGTH) &&
5255
((ctxt->options & XML_PARSE_HUGE) == 0)) {
5256
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5257
"PI %s too big found", target);
5259
ctxt->instate = state;
5264
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5265
"ParsePI: PI %s never end ...\n", target);
5267
if (input != ctxt->input) {
5268
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5269
"PI declaration doesn't start and stop in the same entity\n");
5273
#ifdef LIBXML_CATALOG_ENABLED
5274
if (((state == XML_PARSER_MISC) ||
5275
(state == XML_PARSER_START)) &&
5276
(xmlStrEqual(target, XML_CATALOG_PI))) {
5277
xmlCatalogAllow allow = xmlCatalogGetDefaults();
5278
if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5279
(allow == XML_CATA_ALLOW_ALL))
5280
xmlParseCatalogPI(ctxt, buf);
5288
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5289
(ctxt->sax->processingInstruction != NULL))
5290
ctxt->sax->processingInstruction(ctxt->userData,
5295
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5297
if (ctxt->instate != XML_PARSER_EOF)
5298
ctxt->instate = state;
5303
* xmlParseNotationDecl:
5304
* @ctxt: an XML parser context
5306
* parse a notation declaration
5308
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5310
* Hence there is actually 3 choices:
5311
* 'PUBLIC' S PubidLiteral
5312
* 'PUBLIC' S PubidLiteral S SystemLiteral
5313
* and 'SYSTEM' S SystemLiteral
5315
* See the NOTE on xmlParseExternalID().
5319
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5320
const xmlChar *name;
5324
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5325
xmlParserInputPtr input = ctxt->input;
5328
if (!IS_BLANK_CH(CUR)) {
5329
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5330
"Space required after '<!NOTATION'\n");
5335
name = xmlParseName(ctxt);
5337
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5340
if (!IS_BLANK_CH(CUR)) {
5341
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5342
"Space required after the NOTATION name'\n");
5345
if (xmlStrchr(name, ':') != NULL) {
5346
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5347
"colon are forbidden from notation names '%s'\n",
5355
Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5359
if (input != ctxt->input) {
5360
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5361
"Notation declaration doesn't start and stop in the same entity\n");
5364
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5365
(ctxt->sax->notationDecl != NULL))
5366
ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5368
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5370
if (Systemid != NULL) xmlFree(Systemid);
5371
if (Pubid != NULL) xmlFree(Pubid);
5376
* xmlParseEntityDecl:
5377
* @ctxt: an XML parser context
5379
* parse <!ENTITY declarations
5381
* [70] EntityDecl ::= GEDecl | PEDecl
5383
* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5385
* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5387
* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5389
* [74] PEDef ::= EntityValue | ExternalID
5391
* [76] NDataDecl ::= S 'NDATA' S Name
5393
* [ VC: Notation Declared ]
5394
* The Name must match the declared name of a notation.
5398
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5399
const xmlChar *name = NULL;
5400
xmlChar *value = NULL;
5401
xmlChar *URI = NULL, *literal = NULL;
5402
const xmlChar *ndata = NULL;
5403
int isParameter = 0;
5404
xmlChar *orig = NULL;
5407
/* GROW; done in the caller */
5408
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5409
xmlParserInputPtr input = ctxt->input;
5412
skipped = SKIP_BLANKS;
5414
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5415
"Space required after '<!ENTITY'\n");
5420
skipped = SKIP_BLANKS;
5422
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
"Space required after '%'\n");
5428
name = xmlParseName(ctxt);
5430
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5431
"xmlParseEntityDecl: no name\n");
5434
if (xmlStrchr(name, ':') != NULL) {
5435
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5436
"colon are forbidden from entities names '%s'\n",
5439
skipped = SKIP_BLANKS;
5441
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442
"Space required after the entity name\n");
5445
ctxt->instate = XML_PARSER_ENTITY_DECL;
5447
* handle the various case of definitions...
5450
if ((RAW == '"') || (RAW == '\'')) {
5451
value = xmlParseEntityValue(ctxt, &orig);
5453
if ((ctxt->sax != NULL) &&
5454
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5455
ctxt->sax->entityDecl(ctxt->userData, name,
5456
XML_INTERNAL_PARAMETER_ENTITY,
5460
URI = xmlParseExternalID(ctxt, &literal, 1);
5461
if ((URI == NULL) && (literal == NULL)) {
5462
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5467
uri = xmlParseURI((const char *) URI);
5469
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5470
"Invalid URI: %s\n", URI);
5472
* This really ought to be a well formedness error
5473
* but the XML Core WG decided otherwise c.f. issue
5474
* E26 of the XML erratas.
5477
if (uri->fragment != NULL) {
5479
* Okay this is foolish to block those but not
5482
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5484
if ((ctxt->sax != NULL) &&
5485
(!ctxt->disableSAX) &&
5486
(ctxt->sax->entityDecl != NULL))
5487
ctxt->sax->entityDecl(ctxt->userData, name,
5488
XML_EXTERNAL_PARAMETER_ENTITY,
5489
literal, URI, NULL);
5496
if ((RAW == '"') || (RAW == '\'')) {
5497
value = xmlParseEntityValue(ctxt, &orig);
5498
if ((ctxt->sax != NULL) &&
5499
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5500
ctxt->sax->entityDecl(ctxt->userData, name,
5501
XML_INTERNAL_GENERAL_ENTITY,
5504
* For expat compatibility in SAX mode.
5506
if ((ctxt->myDoc == NULL) ||
5507
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5508
if (ctxt->myDoc == NULL) {
5509
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5510
if (ctxt->myDoc == NULL) {
5511
xmlErrMemory(ctxt, "New Doc failed");
5514
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5516
if (ctxt->myDoc->intSubset == NULL)
5517
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5518
BAD_CAST "fake", NULL, NULL);
5520
xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5524
URI = xmlParseExternalID(ctxt, &literal, 1);
5525
if ((URI == NULL) && (literal == NULL)) {
5526
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5531
uri = xmlParseURI((const char *)URI);
5533
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5534
"Invalid URI: %s\n", URI);
5536
* This really ought to be a well formedness error
5537
* but the XML Core WG decided otherwise c.f. issue
5538
* E26 of the XML erratas.
5541
if (uri->fragment != NULL) {
5543
* Okay this is foolish to block those but not
5546
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5551
if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5552
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5553
"Space required before 'NDATA'\n");
5556
if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5558
if (!IS_BLANK_CH(CUR)) {
5559
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560
"Space required after 'NDATA'\n");
5563
ndata = xmlParseName(ctxt);
5564
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5565
(ctxt->sax->unparsedEntityDecl != NULL))
5566
ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5567
literal, URI, ndata);
5569
if ((ctxt->sax != NULL) &&
5570
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5571
ctxt->sax->entityDecl(ctxt->userData, name,
5572
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5573
literal, URI, NULL);
5575
* For expat compatibility in SAX mode.
5576
* assuming the entity repalcement was asked for
5578
if ((ctxt->replaceEntities != 0) &&
5579
((ctxt->myDoc == NULL) ||
5580
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5581
if (ctxt->myDoc == NULL) {
5582
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5583
if (ctxt->myDoc == NULL) {
5584
xmlErrMemory(ctxt, "New Doc failed");
5587
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5590
if (ctxt->myDoc->intSubset == NULL)
5591
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5592
BAD_CAST "fake", NULL, NULL);
5593
xmlSAX2EntityDecl(ctxt, name,
5594
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5595
literal, URI, NULL);
5600
if (ctxt->instate == XML_PARSER_EOF)
5604
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5605
"xmlParseEntityDecl: entity %s not terminated\n", name);
5607
if (input != ctxt->input) {
5608
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5609
"Entity declaration doesn't start and stop in the same entity\n");
5615
* Ugly mechanism to save the raw entity value.
5617
xmlEntityPtr cur = NULL;
5620
if ((ctxt->sax != NULL) &&
5621
(ctxt->sax->getParameterEntity != NULL))
5622
cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5624
if ((ctxt->sax != NULL) &&
5625
(ctxt->sax->getEntity != NULL))
5626
cur = ctxt->sax->getEntity(ctxt->userData, name);
5627
if ((cur == NULL) && (ctxt->userData==ctxt)) {
5628
cur = xmlSAX2GetEntity(ctxt, name);
5632
if (cur->orig != NULL)
5639
if (value != NULL) xmlFree(value);
5640
if (URI != NULL) xmlFree(URI);
5641
if (literal != NULL) xmlFree(literal);
5646
* xmlParseDefaultDecl:
5647
* @ctxt: an XML parser context
5648
* @value: Receive a possible fixed default value for the attribute
5650
* Parse an attribute default declaration
5652
* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5654
* [ VC: Required Attribute ]
5655
* if the default declaration is the keyword #REQUIRED, then the
5656
* attribute must be specified for all elements of the type in the
5657
* attribute-list declaration.
5659
* [ VC: Attribute Default Legal ]
5660
* The declared default value must meet the lexical constraints of
5661
* the declared attribute type c.f. xmlValidateAttributeDecl()
5663
* [ VC: Fixed Attribute Default ]
5664
* if an attribute has a default value declared with the #FIXED
5665
* keyword, instances of that attribute must match the default value.
5667
* [ WFC: No < in Attribute Values ]
5668
* handled in xmlParseAttValue()
5670
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5671
* or XML_ATTRIBUTE_FIXED.
5675
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5680
if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5682
return(XML_ATTRIBUTE_REQUIRED);
5684
if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5686
return(XML_ATTRIBUTE_IMPLIED);
5688
val = XML_ATTRIBUTE_NONE;
5689
if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5691
val = XML_ATTRIBUTE_FIXED;
5692
if (!IS_BLANK_CH(CUR)) {
5693
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5694
"Space required after '#FIXED'\n");
5698
ret = xmlParseAttValue(ctxt);
5699
ctxt->instate = XML_PARSER_DTD;
5701
xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5702
"Attribute default value declaration error\n");
5709
* xmlParseNotationType:
5710
* @ctxt: an XML parser context
5712
* parse an Notation attribute type.
5714
* Note: the leading 'NOTATION' S part has already being parsed...
5716
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5718
* [ VC: Notation Attributes ]
5719
* Values of this type must match one of the notation names included
5720
* in the declaration; all notation names in the declaration must be declared.
5722
* Returns: the notation attribute tree built while parsing
5726
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5727
const xmlChar *name;
5728
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5731
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5738
name = xmlParseName(ctxt);
5740
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5741
"Name expected in NOTATION declaration\n");
5742
xmlFreeEnumeration(ret);
5746
while (tmp != NULL) {
5747
if (xmlStrEqual(name, tmp->name)) {
5748
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5749
"standalone: attribute notation value token %s duplicated\n",
5751
if (!xmlDictOwns(ctxt->dict, name))
5752
xmlFree((xmlChar *) name);
5758
cur = xmlCreateEnumeration(name);
5760
xmlFreeEnumeration(ret);
5763
if (last == NULL) ret = last = cur;
5770
} while (RAW == '|');
5772
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5773
xmlFreeEnumeration(ret);
5781
* xmlParseEnumerationType:
5782
* @ctxt: an XML parser context
5784
* parse an Enumeration attribute type.
5786
* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5788
* [ VC: Enumeration ]
5789
* Values of this type must match one of the Nmtoken tokens in
5792
* Returns: the enumeration attribute tree built while parsing
5796
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5798
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5801
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5808
name = xmlParseNmtoken(ctxt);
5810
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5814
while (tmp != NULL) {
5815
if (xmlStrEqual(name, tmp->name)) {
5816
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5817
"standalone: attribute enumeration value token %s duplicated\n",
5819
if (!xmlDictOwns(ctxt->dict, name))
5826
cur = xmlCreateEnumeration(name);
5827
if (!xmlDictOwns(ctxt->dict, name))
5830
xmlFreeEnumeration(ret);
5833
if (last == NULL) ret = last = cur;
5840
} while (RAW == '|');
5842
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5850
* xmlParseEnumeratedType:
5851
* @ctxt: an XML parser context
5852
* @tree: the enumeration tree built while parsing
5854
* parse an Enumerated attribute type.
5856
* [57] EnumeratedType ::= NotationType | Enumeration
5858
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5861
* Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5865
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5866
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5868
if (!IS_BLANK_CH(CUR)) {
5869
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5870
"Space required after 'NOTATION'\n");
5874
*tree = xmlParseNotationType(ctxt);
5875
if (*tree == NULL) return(0);
5876
return(XML_ATTRIBUTE_NOTATION);
5878
*tree = xmlParseEnumerationType(ctxt);
5879
if (*tree == NULL) return(0);
5880
return(XML_ATTRIBUTE_ENUMERATION);
5884
* xmlParseAttributeType:
5885
* @ctxt: an XML parser context
5886
* @tree: the enumeration tree built while parsing
5888
* parse the Attribute list def for an element
5890
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
5892
* [55] StringType ::= 'CDATA'
5894
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5895
* 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5897
* Validity constraints for attribute values syntax are checked in
5898
* xmlValidateAttributeValue()
5901
* Values of type ID must match the Name production. A name must not
5902
* appear more than once in an XML document as a value of this type;
5903
* i.e., ID values must uniquely identify the elements which bear them.
5905
* [ VC: One ID per Element Type ]
5906
* No element type may have more than one ID attribute specified.
5908
* [ VC: ID Attribute Default ]
5909
* An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5912
* Values of type IDREF must match the Name production, and values
5913
* of type IDREFS must match Names; each IDREF Name must match the value
5914
* of an ID attribute on some element in the XML document; i.e. IDREF
5915
* values must match the value of some ID attribute.
5917
* [ VC: Entity Name ]
5918
* Values of type ENTITY must match the Name production, values
5919
* of type ENTITIES must match Names; each Entity Name must match the
5920
* name of an unparsed entity declared in the DTD.
5922
* [ VC: Name Token ]
5923
* Values of type NMTOKEN must match the Nmtoken production; values
5924
* of type NMTOKENS must match Nmtokens.
5926
* Returns the attribute type
5929
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5931
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5933
return(XML_ATTRIBUTE_CDATA);
5934
} else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5936
return(XML_ATTRIBUTE_IDREFS);
5937
} else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5939
return(XML_ATTRIBUTE_IDREF);
5940
} else if ((RAW == 'I') && (NXT(1) == 'D')) {
5942
return(XML_ATTRIBUTE_ID);
5943
} else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5945
return(XML_ATTRIBUTE_ENTITY);
5946
} else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5948
return(XML_ATTRIBUTE_ENTITIES);
5949
} else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5951
return(XML_ATTRIBUTE_NMTOKENS);
5952
} else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5954
return(XML_ATTRIBUTE_NMTOKEN);
5956
return(xmlParseEnumeratedType(ctxt, tree));
5960
* xmlParseAttributeListDecl:
5961
* @ctxt: an XML parser context
5963
* : parse the Attribute list def for an element
5965
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5967
* [53] AttDef ::= S Name S AttType S DefaultDecl
5971
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5972
const xmlChar *elemName;
5973
const xmlChar *attrName;
5974
xmlEnumerationPtr tree;
5976
if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5977
xmlParserInputPtr input = ctxt->input;
5980
if (!IS_BLANK_CH(CUR)) {
5981
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982
"Space required after '<!ATTLIST'\n");
5985
elemName = xmlParseName(ctxt);
5986
if (elemName == NULL) {
5987
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5988
"ATTLIST: no name for Element\n");
5993
while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5994
const xmlChar *check = CUR_PTR;
5997
xmlChar *defaultValue = NULL;
6001
attrName = xmlParseName(ctxt);
6002
if (attrName == NULL) {
6003
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6004
"ATTLIST: no name for Attribute\n");
6008
if (!IS_BLANK_CH(CUR)) {
6009
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6010
"Space required after the attribute name\n");
6015
type = xmlParseAttributeType(ctxt, &tree);
6021
if (!IS_BLANK_CH(CUR)) {
6022
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023
"Space required after the attribute type\n");
6025
xmlFreeEnumeration(tree);
6030
def = xmlParseDefaultDecl(ctxt, &defaultValue);
6032
if (defaultValue != NULL)
6033
xmlFree(defaultValue);
6035
xmlFreeEnumeration(tree);
6038
if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6039
xmlAttrNormalizeSpace(defaultValue, defaultValue);
6043
if (!IS_BLANK_CH(CUR)) {
6044
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6045
"Space required after the attribute default value\n");
6046
if (defaultValue != NULL)
6047
xmlFree(defaultValue);
6049
xmlFreeEnumeration(tree);
6054
if (check == CUR_PTR) {
6055
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6056
"in xmlParseAttributeListDecl\n");
6057
if (defaultValue != NULL)
6058
xmlFree(defaultValue);
6060
xmlFreeEnumeration(tree);
6063
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6064
(ctxt->sax->attributeDecl != NULL))
6065
ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6066
type, def, defaultValue, tree);
6067
else if (tree != NULL)
6068
xmlFreeEnumeration(tree);
6070
if ((ctxt->sax2) && (defaultValue != NULL) &&
6071
(def != XML_ATTRIBUTE_IMPLIED) &&
6072
(def != XML_ATTRIBUTE_REQUIRED)) {
6073
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6076
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6078
if (defaultValue != NULL)
6079
xmlFree(defaultValue);
6083
if (input != ctxt->input) {
6084
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6085
"Attribute list declaration doesn't start and stop in the same entity\n",
6094
* xmlParseElementMixedContentDecl:
6095
* @ctxt: an XML parser context
6096
* @inputchk: the input used for the current entity, needed for boundary checks
6098
* parse the declaration for a Mixed Element content
6099
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6101
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6102
* '(' S? '#PCDATA' S? ')'
6104
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6106
* [ VC: No Duplicate Types ]
6107
* The same name must not appear more than once in a single
6108
* mixed-content declaration.
6110
* returns: the list of the xmlElementContentPtr describing the element choices
6112
xmlElementContentPtr
6113
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6114
xmlElementContentPtr ret = NULL, cur = NULL, n;
6115
const xmlChar *elem = NULL;
6118
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6123
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6124
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6125
"Element content declaration doesn't start and stop in the same entity\n",
6129
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6133
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6138
if ((RAW == '(') || (RAW == '|')) {
6139
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6140
if (ret == NULL) return(NULL);
6142
while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6145
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6146
if (ret == NULL) return(NULL);
6152
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6153
if (n == NULL) return(NULL);
6154
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6163
elem = xmlParseName(ctxt);
6165
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6166
"xmlParseElementMixedContentDecl : Name expected\n");
6167
xmlFreeDocElementContent(ctxt->myDoc, cur);
6173
if ((RAW == ')') && (NXT(1) == '*')) {
6175
cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6176
XML_ELEMENT_CONTENT_ELEMENT);
6177
if (cur->c2 != NULL)
6178
cur->c2->parent = cur;
6181
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6182
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6183
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6184
"Element content declaration doesn't start and stop in the same entity\n",
6189
xmlFreeDocElementContent(ctxt->myDoc, ret);
6190
xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6195
xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6201
* xmlParseElementChildrenContentDeclPriv:
6202
* @ctxt: an XML parser context
6203
* @inputchk: the input used for the current entity, needed for boundary checks
6204
* @depth: the level of recursion
6206
* parse the declaration for a Mixed Element content
6207
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6210
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6212
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6214
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6216
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6218
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6219
* TODO Parameter-entity replacement text must be properly nested
6220
* with parenthesized groups. That is to say, if either of the
6221
* opening or closing parentheses in a choice, seq, or Mixed
6222
* construct is contained in the replacement text for a parameter
6223
* entity, both must be contained in the same replacement text. For
6224
* interoperability, if a parameter-entity reference appears in a
6225
* choice, seq, or Mixed construct, its replacement text should not
6226
* be empty, and neither the first nor last non-blank character of
6227
* the replacement text should be a connector (| or ,).
6229
* Returns the tree of xmlElementContentPtr describing the element
6232
static xmlElementContentPtr
6233
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6235
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6236
const xmlChar *elem;
6239
if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6241
xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6242
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6249
int inputid = ctxt->input->id;
6251
/* Recurse on first child */
6254
cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6259
elem = xmlParseName(ctxt);
6261
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6264
cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6266
xmlErrMemory(ctxt, NULL);
6271
cur->ocur = XML_ELEMENT_CONTENT_OPT;
6273
} else if (RAW == '*') {
6274
cur->ocur = XML_ELEMENT_CONTENT_MULT;
6276
} else if (RAW == '+') {
6277
cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6280
cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6286
while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6288
* Each loop we parse one separator and one element.
6291
if (type == 0) type = CUR;
6294
* Detect "Name | Name , Name" error
6296
else if (type != CUR) {
6297
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6298
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6300
if ((last != NULL) && (last != ret))
6301
xmlFreeDocElementContent(ctxt->myDoc, last);
6303
xmlFreeDocElementContent(ctxt->myDoc, ret);
6308
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6310
if ((last != NULL) && (last != ret))
6311
xmlFreeDocElementContent(ctxt->myDoc, last);
6312
xmlFreeDocElementContent(ctxt->myDoc, ret);
6330
} else if (RAW == '|') {
6331
if (type == 0) type = CUR;
6334
* Detect "Name , Name | Name" error
6336
else if (type != CUR) {
6337
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6338
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6340
if ((last != NULL) && (last != ret))
6341
xmlFreeDocElementContent(ctxt->myDoc, last);
6343
xmlFreeDocElementContent(ctxt->myDoc, ret);
6348
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6350
if ((last != NULL) && (last != ret))
6351
xmlFreeDocElementContent(ctxt->myDoc, last);
6353
xmlFreeDocElementContent(ctxt->myDoc, ret);
6372
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6373
if ((last != NULL) && (last != ret))
6374
xmlFreeDocElementContent(ctxt->myDoc, last);
6376
xmlFreeDocElementContent(ctxt->myDoc, ret);
6383
int inputid = ctxt->input->id;
6384
/* Recurse on second child */
6387
last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6391
elem = xmlParseName(ctxt);
6393
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6395
xmlFreeDocElementContent(ctxt->myDoc, ret);
6398
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6401
xmlFreeDocElementContent(ctxt->myDoc, ret);
6405
last->ocur = XML_ELEMENT_CONTENT_OPT;
6407
} else if (RAW == '*') {
6408
last->ocur = XML_ELEMENT_CONTENT_MULT;
6410
} else if (RAW == '+') {
6411
last->ocur = XML_ELEMENT_CONTENT_PLUS;
6414
last->ocur = XML_ELEMENT_CONTENT_ONCE;
6420
if ((cur != NULL) && (last != NULL)) {
6425
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6426
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6427
"Element content declaration doesn't start and stop in the same entity\n",
6433
if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6434
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6435
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6437
ret->ocur = XML_ELEMENT_CONTENT_OPT;
6440
} else if (RAW == '*') {
6442
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6445
* Some normalization:
6446
* (a | b* | c?)* == (a | b | c)*
6448
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6449
if ((cur->c1 != NULL) &&
6450
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6451
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6452
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6453
if ((cur->c2 != NULL) &&
6454
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6456
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6461
} else if (RAW == '+') {
6465
if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6466
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6467
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6469
ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6471
* Some normalization:
6472
* (a | b*)+ == (a | b)*
6473
* (a | b?)+ == (a | b)*
6475
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6476
if ((cur->c1 != NULL) &&
6477
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6478
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6479
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6482
if ((cur->c2 != NULL) &&
6483
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6484
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6485
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6491
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6499
* xmlParseElementChildrenContentDecl:
6500
* @ctxt: an XML parser context
6501
* @inputchk: the input used for the current entity, needed for boundary checks
6503
* parse the declaration for a Mixed Element content
6504
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6506
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6508
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6510
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6512
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6514
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6515
* TODO Parameter-entity replacement text must be properly nested
6516
* with parenthesized groups. That is to say, if either of the
6517
* opening or closing parentheses in a choice, seq, or Mixed
6518
* construct is contained in the replacement text for a parameter
6519
* entity, both must be contained in the same replacement text. For
6520
* interoperability, if a parameter-entity reference appears in a
6521
* choice, seq, or Mixed construct, its replacement text should not
6522
* be empty, and neither the first nor last non-blank character of
6523
* the replacement text should be a connector (| or ,).
6525
* Returns the tree of xmlElementContentPtr describing the element
6528
xmlElementContentPtr
6529
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6530
/* stub left for API/ABI compat */
6531
return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6535
* xmlParseElementContentDecl:
6536
* @ctxt: an XML parser context
6537
* @name: the name of the element being defined.
6538
* @result: the Element Content pointer will be stored here if any
6540
* parse the declaration for an Element content either Mixed or Children,
6541
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6543
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6545
* returns: the type of element content XML_ELEMENT_TYPE_xxx
6549
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6550
xmlElementContentPtr *result) {
6552
xmlElementContentPtr tree = NULL;
6553
int inputid = ctxt->input->id;
6559
xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6560
"xmlParseElementContentDecl : %s '(' expected\n", name);
6565
if (ctxt->instate == XML_PARSER_EOF)
6568
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6569
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6570
res = XML_ELEMENT_TYPE_MIXED;
6572
tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6573
res = XML_ELEMENT_TYPE_ELEMENT;
6581
* xmlParseElementDecl:
6582
* @ctxt: an XML parser context
6584
* parse an Element declaration.
6586
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6588
* [ VC: Unique Element Type Declaration ]
6589
* No element type may be declared more than once
6591
* Returns the type of the element, or -1 in case of error
6594
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6595
const xmlChar *name;
6597
xmlElementContentPtr content = NULL;
6599
/* GROW; done in the caller */
6600
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6601
xmlParserInputPtr input = ctxt->input;
6604
if (!IS_BLANK_CH(CUR)) {
6605
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6606
"Space required after 'ELEMENT'\n");
6609
name = xmlParseName(ctxt);
6611
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6612
"xmlParseElementDecl: no name for Element\n");
6615
while ((RAW == 0) && (ctxt->inputNr > 1))
6617
if (!IS_BLANK_CH(CUR)) {
6618
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6619
"Space required after the element name\n");
6622
if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6625
* Element must always be empty.
6627
ret = XML_ELEMENT_TYPE_EMPTY;
6628
} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6632
* Element is a generic container.
6634
ret = XML_ELEMENT_TYPE_ANY;
6635
} else if (RAW == '(') {
6636
ret = xmlParseElementContentDecl(ctxt, name, &content);
6639
* [ WFC: PEs in Internal Subset ] error handling.
6641
if ((RAW == '%') && (ctxt->external == 0) &&
6642
(ctxt->inputNr == 1)) {
6643
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6644
"PEReference: forbidden within markup decl in internal subset\n");
6646
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6647
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6654
* Pop-up of finished entities.
6656
while ((RAW == 0) && (ctxt->inputNr > 1))
6661
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6662
if (content != NULL) {
6663
xmlFreeDocElementContent(ctxt->myDoc, content);
6666
if (input != ctxt->input) {
6667
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6668
"Element declaration doesn't start and stop in the same entity\n");
6672
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6673
(ctxt->sax->elementDecl != NULL)) {
6674
if (content != NULL)
6675
content->parent = NULL;
6676
ctxt->sax->elementDecl(ctxt->userData, name, ret,
6678
if ((content != NULL) && (content->parent == NULL)) {
6680
* this is a trick: if xmlAddElementDecl is called,
6681
* instead of copying the full tree it is plugged directly
6682
* if called from the parser. Avoid duplicating the
6683
* interfaces or change the API/ABI
6685
xmlFreeDocElementContent(ctxt->myDoc, content);
6687
} else if (content != NULL) {
6688
xmlFreeDocElementContent(ctxt->myDoc, content);
6696
* xmlParseConditionalSections
6697
* @ctxt: an XML parser context
6699
* [61] conditionalSect ::= includeSect | ignoreSect
6700
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6701
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6702
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6703
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6707
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6708
int id = ctxt->input->id;
6712
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6716
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6718
if (ctxt->input->id != id) {
6719
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6720
"All markup of the conditional section is not in the same entity\n",
6725
if (xmlParserDebugEntities) {
6726
if ((ctxt->input != NULL) && (ctxt->input->filename))
6727
xmlGenericError(xmlGenericErrorContext,
6728
"%s(%d): ", ctxt->input->filename,
6730
xmlGenericError(xmlGenericErrorContext,
6731
"Entering INCLUDE Conditional Section\n");
6734
while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6735
(NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6736
const xmlChar *check = CUR_PTR;
6737
unsigned int cons = ctxt->input->consumed;
6739
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6740
xmlParseConditionalSections(ctxt);
6741
} else if (IS_BLANK_CH(CUR)) {
6743
} else if (RAW == '%') {
6744
xmlParsePEReference(ctxt);
6746
xmlParseMarkupDecl(ctxt);
6749
* Pop-up of finished entities.
6751
while ((RAW == 0) && (ctxt->inputNr > 1))
6754
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6755
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6759
if (xmlParserDebugEntities) {
6760
if ((ctxt->input != NULL) && (ctxt->input->filename))
6761
xmlGenericError(xmlGenericErrorContext,
6762
"%s(%d): ", ctxt->input->filename,
6764
xmlGenericError(xmlGenericErrorContext,
6765
"Leaving INCLUDE Conditional Section\n");
6768
} else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6770
xmlParserInputState instate;
6776
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6778
if (ctxt->input->id != id) {
6779
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6780
"All markup of the conditional section is not in the same entity\n",
6785
if (xmlParserDebugEntities) {
6786
if ((ctxt->input != NULL) && (ctxt->input->filename))
6787
xmlGenericError(xmlGenericErrorContext,
6788
"%s(%d): ", ctxt->input->filename,
6790
xmlGenericError(xmlGenericErrorContext,
6791
"Entering IGNORE Conditional Section\n");
6795
* Parse up to the end of the conditional section
6796
* But disable SAX event generating DTD building in the meantime
6798
state = ctxt->disableSAX;
6799
instate = ctxt->instate;
6800
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6801
ctxt->instate = XML_PARSER_IGNORE;
6803
while (((depth >= 0) && (RAW != 0)) &&
6804
(ctxt->instate != XML_PARSER_EOF)) {
6805
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6810
if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6811
if (--depth >= 0) SKIP(3);
6818
ctxt->disableSAX = state;
6819
ctxt->instate = instate;
6821
if (xmlParserDebugEntities) {
6822
if ((ctxt->input != NULL) && (ctxt->input->filename))
6823
xmlGenericError(xmlGenericErrorContext,
6824
"%s(%d): ", ctxt->input->filename,
6826
xmlGenericError(xmlGenericErrorContext,
6827
"Leaving IGNORE Conditional Section\n");
6831
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6838
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6840
if (ctxt->input->id != id) {
6841
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6842
"All markup of the conditional section is not in the same entity\n",
6850
* xmlParseMarkupDecl:
6851
* @ctxt: an XML parser context
6853
* parse Markup declarations
6855
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6856
* NotationDecl | PI | Comment
6858
* [ VC: Proper Declaration/PE Nesting ]
6859
* Parameter-entity replacement text must be properly nested with
6860
* markup declarations. That is to say, if either the first character
6861
* or the last character of a markup declaration (markupdecl above) is
6862
* contained in the replacement text for a parameter-entity reference,
6863
* both must be contained in the same replacement text.
6865
* [ WFC: PEs in Internal Subset ]
6866
* In the internal DTD subset, parameter-entity references can occur
6867
* only where markup declarations can occur, not within markup declarations.
6868
* (This does not apply to references that occur in external parameter
6869
* entities or to the external subset.)
6872
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6875
if (NXT(1) == '!') {
6879
xmlParseElementDecl(ctxt);
6880
else if (NXT(3) == 'N')
6881
xmlParseEntityDecl(ctxt);
6884
xmlParseAttributeListDecl(ctxt);
6887
xmlParseNotationDecl(ctxt);
6890
xmlParseComment(ctxt);
6893
/* there is an error but it will be detected later */
6896
} else if (NXT(1) == '?') {
6901
* This is only for internal subset. On external entities,
6902
* the replacement is done before parsing stage
6904
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6905
xmlParsePEReference(ctxt);
6908
* Conditional sections are allowed from entities included
6909
* by PE References in the internal subset.
6911
if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6912
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6913
xmlParseConditionalSections(ctxt);
6917
ctxt->instate = XML_PARSER_DTD;
6922
* @ctxt: an XML parser context
6924
* parse an XML declaration header for external entities
6926
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6930
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6932
const xmlChar *encoding;
6935
* We know that '<?xml' is here.
6937
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6940
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6944
if (!IS_BLANK_CH(CUR)) {
6945
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6946
"Space needed after '<?xml'\n");
6951
* We may have the VersionInfo here.
6953
version = xmlParseVersionInfo(ctxt);
6954
if (version == NULL)
6955
version = xmlCharStrdup(XML_DEFAULT_VERSION);
6957
if (!IS_BLANK_CH(CUR)) {
6958
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6959
"Space needed here\n");
6962
ctxt->input->version = version;
6965
* We must have the encoding declaration
6967
encoding = xmlParseEncodingDecl(ctxt);
6968
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6970
* The XML REC instructs us to stop parsing right here
6974
if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6975
xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6976
"Missing encoding in text declaration\n");
6980
if ((RAW == '?') && (NXT(1) == '>')) {
6982
} else if (RAW == '>') {
6983
/* Deprecated old WD ... */
6984
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6987
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6988
MOVETO_ENDTAG(CUR_PTR);
6994
* xmlParseExternalSubset:
6995
* @ctxt: an XML parser context
6996
* @ExternalID: the external identifier
6997
* @SystemID: the system identifier (or URL)
6999
* parse Markup declarations from an external subset
7001
* [30] extSubset ::= textDecl? extSubsetDecl
7003
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7006
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7007
const xmlChar *SystemID) {
7008
xmlDetectSAX2(ctxt);
7011
if ((ctxt->encoding == NULL) &&
7012
(ctxt->input->end - ctxt->input->cur >= 4)) {
7014
xmlCharEncoding enc;
7020
enc = xmlDetectCharEncoding(start, 4);
7021
if (enc != XML_CHAR_ENCODING_NONE)
7022
xmlSwitchEncoding(ctxt, enc);
7025
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7026
xmlParseTextDecl(ctxt);
7027
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7029
* The XML REC instructs us to stop parsing right here
7031
ctxt->instate = XML_PARSER_EOF;
7035
if (ctxt->myDoc == NULL) {
7036
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7037
if (ctxt->myDoc == NULL) {
7038
xmlErrMemory(ctxt, "New Doc failed");
7041
ctxt->myDoc->properties = XML_DOC_INTERNAL;
7043
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7044
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7046
ctxt->instate = XML_PARSER_DTD;
7048
while (((RAW == '<') && (NXT(1) == '?')) ||
7049
((RAW == '<') && (NXT(1) == '!')) ||
7050
(RAW == '%') || IS_BLANK_CH(CUR)) {
7051
const xmlChar *check = CUR_PTR;
7052
unsigned int cons = ctxt->input->consumed;
7055
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7056
xmlParseConditionalSections(ctxt);
7057
} else if (IS_BLANK_CH(CUR)) {
7059
} else if (RAW == '%') {
7060
xmlParsePEReference(ctxt);
7062
xmlParseMarkupDecl(ctxt);
7065
* Pop-up of finished entities.
7067
while ((RAW == 0) && (ctxt->inputNr > 1))
7070
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7071
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7077
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7083
* xmlParseReference:
7084
* @ctxt: an XML parser context
7086
* parse and handle entity references in content, depending on the SAX
7087
* interface, this may end-up in a call to character() if this is a
7088
* CharRef, a predefined entity, if there is no reference() callback.
7089
* or if the parser was asked to switch to that mode.
7091
* [67] Reference ::= EntityRef | CharRef
7094
xmlParseReference(xmlParserCtxtPtr ctxt) {
7098
xmlNodePtr list = NULL;
7099
xmlParserErrors ret = XML_ERR_OK;
7106
* Simple case of a CharRef
7108
if (NXT(1) == '#') {
7112
int value = xmlParseCharRef(ctxt);
7116
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7118
* So we are using non-UTF-8 buffers
7119
* Check that the char fit on 8bits, if not
7120
* generate a CharRef.
7122
if (value <= 0xFF) {
7125
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7126
(!ctxt->disableSAX))
7127
ctxt->sax->characters(ctxt->userData, out, 1);
7129
if ((hex == 'x') || (hex == 'X'))
7130
snprintf((char *)out, sizeof(out), "#x%X", value);
7132
snprintf((char *)out, sizeof(out), "#%d", value);
7133
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7134
(!ctxt->disableSAX))
7135
ctxt->sax->reference(ctxt->userData, out);
7139
* Just encode the value in UTF-8
7141
COPY_BUF(0 ,out, i, value);
7143
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144
(!ctxt->disableSAX))
7145
ctxt->sax->characters(ctxt->userData, out, i);
7151
* We are seeing an entity reference
7153
ent = xmlParseEntityRef(ctxt);
7154
if (ent == NULL) return;
7155
if (!ctxt->wellFormed)
7157
was_checked = ent->checked;
7159
/* special case of predefined entities */
7160
if ((ent->name == NULL) ||
7161
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7163
if (val == NULL) return;
7165
* inline the entity.
7167
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7168
(!ctxt->disableSAX))
7169
ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7174
* The first reference to the entity trigger a parsing phase
7175
* where the ent->children is filled with the result from
7177
* Note: external parsed entities will not be loaded, it is not
7178
* required for a non-validating parser, unless the parsing option
7179
* of validating, or substituting entities were given. Doing so is
7180
* far more secure as the parser will only process data coming from
7181
* the document entity by default.
7183
if ((ent->checked == 0) &&
7184
((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7185
(ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7186
unsigned long oldnbent = ctxt->nbentities;
7189
* This is a bit hackish but this seems the best
7190
* way to make sure both SAX and DOM entity support
7194
if (ctxt->userData == ctxt)
7197
user_data = ctxt->userData;
7200
* Check that this entity is well formed
7201
* 4.3.2: An internal general parsed entity is well-formed
7202
* if its replacement text matches the production labeled
7205
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7207
ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7211
} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7213
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7214
user_data, ctxt->depth, ent->URI,
7215
ent->ExternalID, &list);
7218
ret = XML_ERR_ENTITY_PE_INTERNAL;
7219
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7220
"invalid entity type found\n", NULL);
7224
* Store the number of entities needing parsing for this entity
7225
* content and do checkings
7227
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7228
if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7230
if (ret == XML_ERR_ENTITY_LOOP) {
7231
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7232
xmlFreeNodeList(list);
7235
if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7236
xmlFreeNodeList(list);
7240
if ((ret == XML_ERR_OK) && (list != NULL)) {
7241
if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7242
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7243
(ent->children == NULL)) {
7244
ent->children = list;
7245
if (ctxt->replaceEntities) {
7247
* Prune it directly in the generated document
7248
* except for single text nodes.
7250
if (((list->type == XML_TEXT_NODE) &&
7251
(list->next == NULL)) ||
7252
(ctxt->parseMode == XML_PARSE_READER)) {
7253
list->parent = (xmlNodePtr) ent;
7258
while (list != NULL) {
7259
list->parent = (xmlNodePtr) ctxt->node;
7260
list->doc = ctxt->myDoc;
7261
if (list->next == NULL)
7265
list = ent->children;
7266
#ifdef LIBXML_LEGACY_ENABLED
7267
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7268
xmlAddEntityReference(ent, list, NULL);
7269
#endif /* LIBXML_LEGACY_ENABLED */
7273
while (list != NULL) {
7274
list->parent = (xmlNodePtr) ent;
7275
xmlSetTreeDoc(list, ent->doc);
7276
if (list->next == NULL)
7282
xmlFreeNodeList(list);
7285
} else if ((ret != XML_ERR_OK) &&
7286
(ret != XML_WAR_UNDECLARED_ENTITY)) {
7287
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7288
"Entity '%s' failed to parse\n", ent->name);
7289
} else if (list != NULL) {
7290
xmlFreeNodeList(list);
7293
if (ent->checked == 0)
7295
} else if (ent->checked != 1) {
7296
ctxt->nbentities += ent->checked / 2;
7300
* Now that the entity content has been gathered
7301
* provide it to the application, this can take different forms based
7302
* on the parsing modes.
7304
if (ent->children == NULL) {
7306
* Probably running in SAX mode and the callbacks don't
7307
* build the entity content. So unless we already went
7308
* though parsing for first checking go though the entity
7309
* content to generate callbacks associated to the entity
7311
if (was_checked != 0) {
7314
* This is a bit hackish but this seems the best
7315
* way to make sure both SAX and DOM entity support
7318
if (ctxt->userData == ctxt)
7321
user_data = ctxt->userData;
7323
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7325
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7326
ent->content, user_data, NULL);
7328
} else if (ent->etype ==
7329
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7331
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7332
ctxt->sax, user_data, ctxt->depth,
7333
ent->URI, ent->ExternalID, NULL);
7336
ret = XML_ERR_ENTITY_PE_INTERNAL;
7337
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7338
"invalid entity type found\n", NULL);
7340
if (ret == XML_ERR_ENTITY_LOOP) {
7341
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7345
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7346
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7348
* Entity reference callback comes second, it's somewhat
7349
* superfluous but a compatibility to historical behaviour
7351
ctxt->sax->reference(ctxt->userData, ent->name);
7357
* If we didn't get any children for the entity being built
7359
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7360
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7364
ctxt->sax->reference(ctxt->userData, ent->name);
7368
if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7370
* There is a problem on the handling of _private for entities
7371
* (bug 155816): Should we copy the content of the field from
7372
* the entity (possibly overwriting some value set by the user
7373
* when a copy is created), should we leave it alone, or should
7374
* we try to take care of different situations? The problem
7375
* is exacerbated by the usage of this field by the xmlReader.
7376
* To fix this bug, we look at _private on the created node
7377
* and, if it's NULL, we copy in whatever was in the entity.
7378
* If it's not NULL we leave it alone. This is somewhat of a
7379
* hack - maybe we should have further tests to determine
7382
if ((ctxt->node != NULL) && (ent->children != NULL)) {
7384
* Seems we are generating the DOM content, do
7385
* a simple tree copy for all references except the first
7386
* In the first occurrence list contains the replacement.
7388
if (((list == NULL) && (ent->owner == 0)) ||
7389
(ctxt->parseMode == XML_PARSE_READER)) {
7390
xmlNodePtr nw = NULL, cur, firstChild = NULL;
7393
* We are copying here, make sure there is no abuse
7395
ctxt->sizeentcopy += ent->length;
7396
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7400
* when operating on a reader, the entities definitions
7401
* are always owning the entities subtree.
7402
if (ctxt->parseMode == XML_PARSE_READER)
7406
cur = ent->children;
7407
while (cur != NULL) {
7408
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7410
if (nw->_private == NULL)
7411
nw->_private = cur->_private;
7412
if (firstChild == NULL){
7415
nw = xmlAddChild(ctxt->node, nw);
7417
if (cur == ent->last) {
7419
* needed to detect some strange empty
7420
* node cases in the reader tests
7422
if ((ctxt->parseMode == XML_PARSE_READER) &&
7424
(nw->type == XML_ELEMENT_NODE) &&
7425
(nw->children == NULL))
7432
#ifdef LIBXML_LEGACY_ENABLED
7433
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7434
xmlAddEntityReference(ent, firstChild, nw);
7435
#endif /* LIBXML_LEGACY_ENABLED */
7436
} else if ((list == NULL) || (ctxt->inputNr > 0)) {
7437
xmlNodePtr nw = NULL, cur, next, last,
7441
* We are copying here, make sure there is no abuse
7443
ctxt->sizeentcopy += ent->length;
7444
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7448
* Copy the entity child list and make it the new
7449
* entity child list. The goal is to make sure any
7450
* ID or REF referenced will be the one from the
7451
* document content and not the entity copy.
7453
cur = ent->children;
7454
ent->children = NULL;
7457
while (cur != NULL) {
7461
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7463
if (nw->_private == NULL)
7464
nw->_private = cur->_private;
7465
if (firstChild == NULL){
7468
xmlAddChild((xmlNodePtr) ent, nw);
7469
xmlAddChild(ctxt->node, cur);
7475
if (ent->owner == 0)
7477
#ifdef LIBXML_LEGACY_ENABLED
7478
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7479
xmlAddEntityReference(ent, firstChild, nw);
7480
#endif /* LIBXML_LEGACY_ENABLED */
7482
const xmlChar *nbktext;
7485
* the name change is to avoid coalescing of the
7486
* node with a possible previous text one which
7487
* would make ent->children a dangling pointer
7489
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7491
if (ent->children->type == XML_TEXT_NODE)
7492
ent->children->name = nbktext;
7493
if ((ent->last != ent->children) &&
7494
(ent->last->type == XML_TEXT_NODE))
7495
ent->last->name = nbktext;
7496
xmlAddChildList(ctxt->node, ent->children);
7500
* This is to avoid a nasty side effect, see
7501
* characters() in SAX.c
7511
* xmlParseEntityRef:
7512
* @ctxt: an XML parser context
7514
* parse ENTITY references declarations
7516
* [68] EntityRef ::= '&' Name ';'
7518
* [ WFC: Entity Declared ]
7519
* In a document without any DTD, a document with only an internal DTD
7520
* subset which contains no parameter entity references, or a document
7521
* with "standalone='yes'", the Name given in the entity reference
7522
* must match that in an entity declaration, except that well-formed
7523
* documents need not declare any of the following entities: amp, lt,
7524
* gt, apos, quot. The declaration of a parameter entity must precede
7525
* any reference to it. Similarly, the declaration of a general entity
7526
* must precede any reference to it which appears in a default value in an
7527
* attribute-list declaration. Note that if entities are declared in the
7528
* external subset or in external parameter entities, a non-validating
7529
* processor is not obligated to read and process their declarations;
7530
* for such documents, the rule that an entity must be declared is a
7531
* well-formedness constraint only if standalone='yes'.
7533
* [ WFC: Parsed Entity ]
7534
* An entity reference must not contain the name of an unparsed entity
7536
* Returns the xmlEntityPtr if found, or NULL otherwise.
7539
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7540
const xmlChar *name;
7541
xmlEntityPtr ent = NULL;
7544
if (ctxt->instate == XML_PARSER_EOF)
7550
name = xmlParseName(ctxt);
7552
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7553
"xmlParseEntityRef: no name\n");
7557
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7563
* Predefined entities override any extra definition
7565
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7566
ent = xmlGetPredefinedEntity(name);
7572
* Increase the number of entity references parsed
7577
* Ask first SAX for entity resolution, otherwise try the
7578
* entities which may have stored in the parser context.
7580
if (ctxt->sax != NULL) {
7581
if (ctxt->sax->getEntity != NULL)
7582
ent = ctxt->sax->getEntity(ctxt->userData, name);
7583
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7584
(ctxt->options & XML_PARSE_OLDSAX))
7585
ent = xmlGetPredefinedEntity(name);
7586
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7587
(ctxt->userData==ctxt)) {
7588
ent = xmlSAX2GetEntity(ctxt, name);
7591
if (ctxt->instate == XML_PARSER_EOF)
7594
* [ WFC: Entity Declared ]
7595
* In a document without any DTD, a document with only an
7596
* internal DTD subset which contains no parameter entity
7597
* references, or a document with "standalone='yes'", the
7598
* Name given in the entity reference must match that in an
7599
* entity declaration, except that well-formed documents
7600
* need not declare any of the following entities: amp, lt,
7602
* The declaration of a parameter entity must precede any
7604
* Similarly, the declaration of a general entity must
7605
* precede any reference to it which appears in a default
7606
* value in an attribute-list declaration. Note that if
7607
* entities are declared in the external subset or in
7608
* external parameter entities, a non-validating processor
7609
* is not obligated to read and process their declarations;
7610
* for such documents, the rule that an entity must be
7611
* declared is a well-formedness constraint only if
7615
if ((ctxt->standalone == 1) ||
7616
((ctxt->hasExternalSubset == 0) &&
7617
(ctxt->hasPErefs == 0))) {
7618
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7619
"Entity '%s' not defined\n", name);
7621
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7622
"Entity '%s' not defined\n", name);
7623
if ((ctxt->inSubset == 0) &&
7624
(ctxt->sax != NULL) &&
7625
(ctxt->sax->reference != NULL)) {
7626
ctxt->sax->reference(ctxt->userData, name);
7633
* [ WFC: Parsed Entity ]
7634
* An entity reference must not contain the name of an
7637
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7638
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7639
"Entity reference to unparsed entity %s\n", name);
7643
* [ WFC: No External Entity References ]
7644
* Attribute values cannot contain direct or indirect
7645
* entity references to external entities.
7647
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7648
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7649
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7650
"Attribute references external entity '%s'\n", name);
7653
* [ WFC: No < in Attribute Values ]
7654
* The replacement text of any entity referred to directly or
7655
* indirectly in an attribute value (other than "<") must
7658
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7660
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7661
if ((ent->checked & 1) || ((ent->checked == 0) &&
7662
(ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7663
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7664
"'<' in entity '%s' is not allowed in attributes values\n", name);
7669
* Internal check, no parameter entities here ...
7672
switch (ent->etype) {
7673
case XML_INTERNAL_PARAMETER_ENTITY:
7674
case XML_EXTERNAL_PARAMETER_ENTITY:
7675
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7676
"Attempt to reference the parameter entity '%s'\n",
7685
* [ WFC: No Recursion ]
7686
* A parsed entity must not contain a recursive reference
7687
* to itself, either directly or indirectly.
7688
* Done somewhere else
7694
* xmlParseStringEntityRef:
7695
* @ctxt: an XML parser context
7696
* @str: a pointer to an index in the string
7698
* parse ENTITY references declarations, but this version parses it from
7701
* [68] EntityRef ::= '&' Name ';'
7703
* [ WFC: Entity Declared ]
7704
* In a document without any DTD, a document with only an internal DTD
7705
* subset which contains no parameter entity references, or a document
7706
* with "standalone='yes'", the Name given in the entity reference
7707
* must match that in an entity declaration, except that well-formed
7708
* documents need not declare any of the following entities: amp, lt,
7709
* gt, apos, quot. The declaration of a parameter entity must precede
7710
* any reference to it. Similarly, the declaration of a general entity
7711
* must precede any reference to it which appears in a default value in an
7712
* attribute-list declaration. Note that if entities are declared in the
7713
* external subset or in external parameter entities, a non-validating
7714
* processor is not obligated to read and process their declarations;
7715
* for such documents, the rule that an entity must be declared is a
7716
* well-formedness constraint only if standalone='yes'.
7718
* [ WFC: Parsed Entity ]
7719
* An entity reference must not contain the name of an unparsed entity
7721
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7722
* is updated to the current location in the string.
7725
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7729
xmlEntityPtr ent = NULL;
7731
if ((str == NULL) || (*str == NULL))
7739
name = xmlParseStringName(ctxt, &ptr);
7741
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7742
"xmlParseStringEntityRef: no name\n");
7747
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7756
* Predefined entites override any extra definition
7758
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7759
ent = xmlGetPredefinedEntity(name);
7768
* Increate the number of entity references parsed
7773
* Ask first SAX for entity resolution, otherwise try the
7774
* entities which may have stored in the parser context.
7776
if (ctxt->sax != NULL) {
7777
if (ctxt->sax->getEntity != NULL)
7778
ent = ctxt->sax->getEntity(ctxt->userData, name);
7779
if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7780
ent = xmlGetPredefinedEntity(name);
7781
if ((ent == NULL) && (ctxt->userData==ctxt)) {
7782
ent = xmlSAX2GetEntity(ctxt, name);
7785
if (ctxt->instate == XML_PARSER_EOF) {
7791
* [ WFC: Entity Declared ]
7792
* In a document without any DTD, a document with only an
7793
* internal DTD subset which contains no parameter entity
7794
* references, or a document with "standalone='yes'", the
7795
* Name given in the entity reference must match that in an
7796
* entity declaration, except that well-formed documents
7797
* need not declare any of the following entities: amp, lt,
7799
* The declaration of a parameter entity must precede any
7801
* Similarly, the declaration of a general entity must
7802
* precede any reference to it which appears in a default
7803
* value in an attribute-list declaration. Note that if
7804
* entities are declared in the external subset or in
7805
* external parameter entities, a non-validating processor
7806
* is not obligated to read and process their declarations;
7807
* for such documents, the rule that an entity must be
7808
* declared is a well-formedness constraint only if
7812
if ((ctxt->standalone == 1) ||
7813
((ctxt->hasExternalSubset == 0) &&
7814
(ctxt->hasPErefs == 0))) {
7815
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7816
"Entity '%s' not defined\n", name);
7818
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7819
"Entity '%s' not defined\n",
7822
/* TODO ? check regressions ctxt->valid = 0; */
7826
* [ WFC: Parsed Entity ]
7827
* An entity reference must not contain the name of an
7830
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7831
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7832
"Entity reference to unparsed entity %s\n", name);
7836
* [ WFC: No External Entity References ]
7837
* Attribute values cannot contain direct or indirect
7838
* entity references to external entities.
7840
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7841
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7842
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7843
"Attribute references external entity '%s'\n", name);
7846
* [ WFC: No < in Attribute Values ]
7847
* The replacement text of any entity referred to directly or
7848
* indirectly in an attribute value (other than "<") must
7851
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7852
(ent != NULL) && (ent->content != NULL) &&
7853
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7854
(xmlStrchr(ent->content, '<'))) {
7855
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7856
"'<' in entity '%s' is not allowed in attributes values\n",
7861
* Internal check, no parameter entities here ...
7864
switch (ent->etype) {
7865
case XML_INTERNAL_PARAMETER_ENTITY:
7866
case XML_EXTERNAL_PARAMETER_ENTITY:
7867
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7868
"Attempt to reference the parameter entity '%s'\n",
7877
* [ WFC: No Recursion ]
7878
* A parsed entity must not contain a recursive reference
7879
* to itself, either directly or indirectly.
7880
* Done somewhere else
7889
* xmlParsePEReference:
7890
* @ctxt: an XML parser context
7892
* parse PEReference declarations
7893
* The entity content is handled directly by pushing it's content as
7894
* a new input stream.
7896
* [69] PEReference ::= '%' Name ';'
7898
* [ WFC: No Recursion ]
7899
* A parsed entity must not contain a recursive
7900
* reference to itself, either directly or indirectly.
7902
* [ WFC: Entity Declared ]
7903
* In a document without any DTD, a document with only an internal DTD
7904
* subset which contains no parameter entity references, or a document
7905
* with "standalone='yes'", ... ... The declaration of a parameter
7906
* entity must precede any reference to it...
7908
* [ VC: Entity Declared ]
7909
* In a document with an external subset or external parameter entities
7910
* with "standalone='no'", ... ... The declaration of a parameter entity
7911
* must precede any reference to it...
7914
* Parameter-entity references may only appear in the DTD.
7915
* NOTE: misleading but this is handled.
7918
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7920
const xmlChar *name;
7921
xmlEntityPtr entity = NULL;
7922
xmlParserInputPtr input;
7927
name = xmlParseName(ctxt);
7929
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7930
"xmlParsePEReference: no name\n");
7934
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7941
* Increate the number of entity references parsed
7946
* Request the entity from SAX
7948
if ((ctxt->sax != NULL) &&
7949
(ctxt->sax->getParameterEntity != NULL))
7950
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7951
if (ctxt->instate == XML_PARSER_EOF)
7953
if (entity == NULL) {
7955
* [ WFC: Entity Declared ]
7956
* In a document without any DTD, a document with only an
7957
* internal DTD subset which contains no parameter entity
7958
* references, or a document with "standalone='yes'", ...
7959
* ... The declaration of a parameter entity must precede
7960
* any reference to it...
7962
if ((ctxt->standalone == 1) ||
7963
((ctxt->hasExternalSubset == 0) &&
7964
(ctxt->hasPErefs == 0))) {
7965
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7966
"PEReference: %%%s; not found\n",
7970
* [ VC: Entity Declared ]
7971
* In a document with an external subset or external
7972
* parameter entities with "standalone='no'", ...
7973
* ... The declaration of a parameter entity must
7974
* precede any reference to it...
7976
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7977
"PEReference: %%%s; not found\n",
7983
* Internal checking in case the entity quest barfed
7985
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7986
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7987
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7988
"Internal: %%%s; is not a parameter entity\n",
7990
} else if (ctxt->input->free != deallocblankswrapper) {
7991
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7992
if (xmlPushInput(ctxt, input) < 0)
7997
* handle the extra spaces added before and after
7998
* c.f. http://www.w3.org/TR/REC-xml#as-PE
8000
input = xmlNewEntityInputStream(ctxt, entity);
8001
if (xmlPushInput(ctxt, input) < 0)
8003
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8004
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8005
(IS_BLANK_CH(NXT(5)))) {
8006
xmlParseTextDecl(ctxt);
8008
XML_ERR_UNSUPPORTED_ENCODING) {
8010
* The XML REC instructs us to stop parsing
8013
ctxt->instate = XML_PARSER_EOF;
8019
ctxt->hasPErefs = 1;
8023
* xmlLoadEntityContent:
8024
* @ctxt: an XML parser context
8025
* @entity: an unloaded system entity
8027
* Load the original content of the given system entity from the
8028
* ExternalID/SystemID given. This is to be used for Included in Literal
8029
* http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8031
* Returns 0 in case of success and -1 in case of failure
8034
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8035
xmlParserInputPtr input;
8040
if ((ctxt == NULL) || (entity == NULL) ||
8041
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8042
(entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8043
(entity->content != NULL)) {
8044
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8045
"xmlLoadEntityContent parameter error");
8049
if (xmlParserDebugEntities)
8050
xmlGenericError(xmlGenericErrorContext,
8051
"Reading %s entity content input\n", entity->name);
8053
buf = xmlBufferCreate();
8055
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8056
"xmlLoadEntityContent parameter error");
8060
input = xmlNewEntityInputStream(ctxt, entity);
8061
if (input == NULL) {
8062
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8063
"xmlLoadEntityContent input error");
8069
* Push the entity as the current input, read char by char
8070
* saving to the buffer until the end of the entity or an error
8072
if (xmlPushInput(ctxt, input) < 0) {
8079
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8081
xmlBufferAdd(buf, ctxt->input->cur, l);
8082
if (count++ > XML_PARSER_CHUNK_SIZE) {
8085
if (ctxt->instate == XML_PARSER_EOF) {
8095
if (ctxt->instate == XML_PARSER_EOF) {
8103
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8105
} else if (!IS_CHAR(c)) {
8106
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8107
"xmlLoadEntityContent: invalid char value %d\n",
8112
entity->content = buf->content;
8113
buf->content = NULL;
8120
* xmlParseStringPEReference:
8121
* @ctxt: an XML parser context
8122
* @str: a pointer to an index in the string
8124
* parse PEReference declarations
8126
* [69] PEReference ::= '%' Name ';'
8128
* [ WFC: No Recursion ]
8129
* A parsed entity must not contain a recursive
8130
* reference to itself, either directly or indirectly.
8132
* [ WFC: Entity Declared ]
8133
* In a document without any DTD, a document with only an internal DTD
8134
* subset which contains no parameter entity references, or a document
8135
* with "standalone='yes'", ... ... The declaration of a parameter
8136
* entity must precede any reference to it...
8138
* [ VC: Entity Declared ]
8139
* In a document with an external subset or external parameter entities
8140
* with "standalone='no'", ... ... The declaration of a parameter entity
8141
* must precede any reference to it...
8144
* Parameter-entity references may only appear in the DTD.
8145
* NOTE: misleading but this is handled.
8147
* Returns the string of the entity content.
8148
* str is updated to the current value of the index
8151
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8155
xmlEntityPtr entity = NULL;
8157
if ((str == NULL) || (*str == NULL)) return(NULL);
8163
name = xmlParseStringName(ctxt, &ptr);
8165
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8166
"xmlParseStringPEReference: no name\n");
8172
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8180
* Increate the number of entity references parsed
8185
* Request the entity from SAX
8187
if ((ctxt->sax != NULL) &&
8188
(ctxt->sax->getParameterEntity != NULL))
8189
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8190
if (ctxt->instate == XML_PARSER_EOF) {
8194
if (entity == NULL) {
8196
* [ WFC: Entity Declared ]
8197
* In a document without any DTD, a document with only an
8198
* internal DTD subset which contains no parameter entity
8199
* references, or a document with "standalone='yes'", ...
8200
* ... The declaration of a parameter entity must precede
8201
* any reference to it...
8203
if ((ctxt->standalone == 1) ||
8204
((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8205
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8206
"PEReference: %%%s; not found\n", name);
8209
* [ VC: Entity Declared ]
8210
* In a document with an external subset or external
8211
* parameter entities with "standalone='no'", ...
8212
* ... The declaration of a parameter entity must
8213
* precede any reference to it...
8215
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8216
"PEReference: %%%s; not found\n",
8222
* Internal checking in case the entity quest barfed
8224
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8225
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8226
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8227
"%%%s; is not a parameter entity\n",
8231
ctxt->hasPErefs = 1;
8238
* xmlParseDocTypeDecl:
8239
* @ctxt: an XML parser context
8241
* parse a DOCTYPE declaration
8243
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8244
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8246
* [ VC: Root Element Type ]
8247
* The Name in the document type declaration must match the element
8248
* type of the root element.
8252
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8253
const xmlChar *name = NULL;
8254
xmlChar *ExternalID = NULL;
8255
xmlChar *URI = NULL;
8258
* We know that '<!DOCTYPE' has been detected.
8265
* Parse the DOCTYPE name.
8267
name = xmlParseName(ctxt);
8269
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8270
"xmlParseDocTypeDecl : no DOCTYPE name !\n");
8272
ctxt->intSubName = name;
8277
* Check for SystemID and ExternalID
8279
URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8281
if ((URI != NULL) || (ExternalID != NULL)) {
8282
ctxt->hasExternalSubset = 1;
8284
ctxt->extSubURI = URI;
8285
ctxt->extSubSystem = ExternalID;
8290
* Create and update the internal subset.
8292
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8293
(!ctxt->disableSAX))
8294
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8295
if (ctxt->instate == XML_PARSER_EOF)
8299
* Is there any internal subset declarations ?
8300
* they are handled separately in xmlParseInternalSubset()
8306
* We should be at the end of the DOCTYPE declaration.
8309
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8315
* xmlParseInternalSubset:
8316
* @ctxt: an XML parser context
8318
* parse the internal subset declaration
8320
* [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8324
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8326
* Is there any DTD definition ?
8329
ctxt->instate = XML_PARSER_DTD;
8332
* Parse the succession of Markup declarations and
8334
* Subsequence (markupdecl | PEReference | S)*
8336
while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8337
const xmlChar *check = CUR_PTR;
8338
unsigned int cons = ctxt->input->consumed;
8341
xmlParseMarkupDecl(ctxt);
8342
xmlParsePEReference(ctxt);
8345
* Pop-up of finished entities.
8347
while ((RAW == 0) && (ctxt->inputNr > 1))
8350
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8351
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8352
"xmlParseInternalSubset: error detected in Markup declaration\n");
8363
* We should be at the end of the DOCTYPE declaration.
8366
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8371
#ifdef LIBXML_SAX1_ENABLED
8373
* xmlParseAttribute:
8374
* @ctxt: an XML parser context
8375
* @value: a xmlChar ** used to store the value of the attribute
8377
* parse an attribute
8379
* [41] Attribute ::= Name Eq AttValue
8381
* [ WFC: No External Entity References ]
8382
* Attribute values cannot contain direct or indirect entity references
8383
* to external entities.
8385
* [ WFC: No < in Attribute Values ]
8386
* The replacement text of any entity referred to directly or indirectly in
8387
* an attribute value (other than "<") must not contain a <.
8389
* [ VC: Attribute Value Type ]
8390
* The attribute must have been declared; the value must be of the type
8393
* [25] Eq ::= S? '=' S?
8397
* [NS 11] Attribute ::= QName Eq AttValue
8399
* Also the case QName == xmlns:??? is handled independently as a namespace
8402
* Returns the attribute name, and the value in *value.
8406
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8407
const xmlChar *name;
8412
name = xmlParseName(ctxt);
8414
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8415
"error parsing attribute name\n");
8426
val = xmlParseAttValue(ctxt);
8427
ctxt->instate = XML_PARSER_CONTENT;
8429
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8430
"Specification mandate value for attribute %s\n", name);
8435
* Check that xml:lang conforms to the specification
8436
* No more registered as an error, just generate a warning now
8437
* since this was deprecated in XML second edition
8439
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8440
if (!xmlCheckLanguageID(val)) {
8441
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8442
"Malformed value for xml:lang : %s\n",
8448
* Check that xml:space conforms to the specification
8450
if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8451
if (xmlStrEqual(val, BAD_CAST "default"))
8453
else if (xmlStrEqual(val, BAD_CAST "preserve"))
8456
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8457
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8468
* @ctxt: an XML parser context
8470
* parse a start of tag either for rule element or
8471
* EmptyElement. In both case we don't parse the tag closing chars.
8473
* [40] STag ::= '<' Name (S Attribute)* S? '>'
8475
* [ WFC: Unique Att Spec ]
8476
* No attribute name may appear more than once in the same start-tag or
8477
* empty-element tag.
8479
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8481
* [ WFC: Unique Att Spec ]
8482
* No attribute name may appear more than once in the same start-tag or
8483
* empty-element tag.
8487
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8489
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8491
* Returns the element name parsed
8495
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8496
const xmlChar *name;
8497
const xmlChar *attname;
8499
const xmlChar **atts = ctxt->atts;
8501
int maxatts = ctxt->maxatts;
8504
if (RAW != '<') return(NULL);
8507
name = xmlParseName(ctxt);
8509
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8510
"xmlParseStartTag: invalid element name\n");
8515
* Now parse the attributes, it ends up with the ending
8522
while (((RAW != '>') &&
8523
((RAW != '/') || (NXT(1) != '>')) &&
8524
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8525
const xmlChar *q = CUR_PTR;
8526
unsigned int cons = ctxt->input->consumed;
8528
attname = xmlParseAttribute(ctxt, &attvalue);
8529
if ((attname != NULL) && (attvalue != NULL)) {
8531
* [ WFC: Unique Att Spec ]
8532
* No attribute name may appear more than once in the same
8533
* start-tag or empty-element tag.
8535
for (i = 0; i < nbatts;i += 2) {
8536
if (xmlStrEqual(atts[i], attname)) {
8537
xmlErrAttributeDup(ctxt, NULL, attname);
8543
* Add the pair to atts
8546
maxatts = 22; /* allow for 10 attrs by default */
8547
atts = (const xmlChar **)
8548
xmlMalloc(maxatts * sizeof(xmlChar *));
8550
xmlErrMemory(ctxt, NULL);
8551
if (attvalue != NULL)
8556
ctxt->maxatts = maxatts;
8557
} else if (nbatts + 4 > maxatts) {
8561
n = (const xmlChar **) xmlRealloc((void *) atts,
8562
maxatts * sizeof(const xmlChar *));
8564
xmlErrMemory(ctxt, NULL);
8565
if (attvalue != NULL)
8571
ctxt->maxatts = maxatts;
8573
atts[nbatts++] = attname;
8574
atts[nbatts++] = attvalue;
8575
atts[nbatts] = NULL;
8576
atts[nbatts + 1] = NULL;
8578
if (attvalue != NULL)
8585
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8587
if (!IS_BLANK_CH(RAW)) {
8588
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8589
"attributes construct error\n");
8592
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8593
(attname == NULL) && (attvalue == NULL)) {
8594
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8595
"xmlParseStartTag: problem parsing attributes\n");
8603
* SAX: Start of Element !
8605
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8606
(!ctxt->disableSAX)) {
8608
ctxt->sax->startElement(ctxt->userData, name, atts);
8610
ctxt->sax->startElement(ctxt->userData, name, NULL);
8614
/* Free only the content strings */
8615
for (i = 1;i < nbatts;i+=2)
8616
if (atts[i] != NULL)
8617
xmlFree((xmlChar *) atts[i]);
8624
* @ctxt: an XML parser context
8625
* @line: line of the start tag
8626
* @nsNr: number of namespaces on the start tag
8628
* parse an end of tag
8630
* [42] ETag ::= '</' Name S? '>'
8634
* [NS 9] ETag ::= '</' QName S? '>'
8638
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8639
const xmlChar *name;
8642
if ((RAW != '<') || (NXT(1) != '/')) {
8643
xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8644
"xmlParseEndTag: '</' not found\n");
8649
name = xmlParseNameAndCompare(ctxt,ctxt->name);
8652
* We should definitely be at the ending "S? '>'" part
8656
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8657
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8662
* [ WFC: Element Type Match ]
8663
* The Name in an element's end-tag must match the element type in the
8667
if (name != (xmlChar*)1) {
8668
if (name == NULL) name = BAD_CAST "unparseable";
8669
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8670
"Opening and ending tag mismatch: %s line %d and %s\n",
8671
ctxt->name, line, name);
8677
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8678
(!ctxt->disableSAX))
8679
ctxt->sax->endElement(ctxt->userData, ctxt->name);
8688
* @ctxt: an XML parser context
8690
* parse an end of tag
8692
* [42] ETag ::= '</' Name S? '>'
8696
* [NS 9] ETag ::= '</' QName S? '>'
8700
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8701
xmlParseEndTag1(ctxt, 0);
8703
#endif /* LIBXML_SAX1_ENABLED */
8705
/************************************************************************
8707
* SAX 2 specific operations *
8709
************************************************************************/
8713
* @ctxt: an XML parser context
8714
* @prefix: the prefix to lookup
8716
* Lookup the namespace name for the @prefix (which ca be NULL)
8717
* The prefix must come from the @ctxt->dict dictionnary
8719
* Returns the namespace name or NULL if not bound
8721
static const xmlChar *
8722
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8725
if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8726
for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8727
if (ctxt->nsTab[i] == prefix) {
8728
if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8730
return(ctxt->nsTab[i + 1]);
8737
* @ctxt: an XML parser context
8738
* @prefix: pointer to store the prefix part
8740
* parse an XML Namespace QName
8742
* [6] QName ::= (Prefix ':')? LocalPart
8743
* [7] Prefix ::= NCName
8744
* [8] LocalPart ::= NCName
8746
* Returns the Name parsed or NULL
8749
static const xmlChar *
8750
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8751
const xmlChar *l, *p;
8755
l = xmlParseNCName(ctxt);
8758
l = xmlParseName(ctxt);
8760
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8761
"Failed to parse QName '%s'\n", l, NULL, NULL);
8771
l = xmlParseNCName(ctxt);
8775
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8776
"Failed to parse QName '%s:'\n", p, NULL, NULL);
8777
l = xmlParseNmtoken(ctxt);
8779
tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8781
tmp = xmlBuildQName(l, p, NULL, 0);
8784
p = xmlDictLookup(ctxt->dict, tmp, -1);
8785
if (tmp != NULL) xmlFree(tmp);
8792
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8793
"Failed to parse QName '%s:%s:'\n", p, l, NULL);
8795
tmp = (xmlChar *) xmlParseName(ctxt);
8797
tmp = xmlBuildQName(tmp, l, NULL, 0);
8798
l = xmlDictLookup(ctxt->dict, tmp, -1);
8799
if (tmp != NULL) xmlFree(tmp);
8803
tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8804
l = xmlDictLookup(ctxt->dict, tmp, -1);
8805
if (tmp != NULL) xmlFree(tmp);
8816
* xmlParseQNameAndCompare:
8817
* @ctxt: an XML parser context
8818
* @name: the localname
8819
* @prefix: the prefix, if any.
8821
* parse an XML name and compares for match
8822
* (specialized for endtag parsing)
8824
* Returns NULL for an illegal name, (xmlChar*) 1 for success
8825
* and the name for mismatch
8828
static const xmlChar *
8829
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8830
xmlChar const *prefix) {
8834
const xmlChar *prefix2;
8836
if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8839
in = ctxt->input->cur;
8842
while (*in != 0 && *in == *cmp) {
8846
if ((*cmp == 0) && (*in == ':')) {
8849
while (*in != 0 && *in == *cmp) {
8853
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8855
ctxt->input->cur = in;
8856
return((const xmlChar*) 1);
8860
* all strings coms from the dictionary, equality can be done directly
8862
ret = xmlParseQName (ctxt, &prefix2);
8863
if ((ret == name) && (prefix == prefix2))
8864
return((const xmlChar*) 1);
8869
* xmlParseAttValueInternal:
8870
* @ctxt: an XML parser context
8871
* @len: attribute len result
8872
* @alloc: whether the attribute was reallocated as a new string
8873
* @normalize: if 1 then further non-CDATA normalization must be done
8875
* parse a value for an attribute.
8876
* NOTE: if no normalization is needed, the routine will return pointers
8877
* directly from the data buffer.
8879
* 3.3.3 Attribute-Value Normalization:
8880
* Before the value of an attribute is passed to the application or
8881
* checked for validity, the XML processor must normalize it as follows:
8882
* - a character reference is processed by appending the referenced
8883
* character to the attribute value
8884
* - an entity reference is processed by recursively processing the
8885
* replacement text of the entity
8886
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8887
* appending #x20 to the normalized value, except that only a single
8888
* #x20 is appended for a "#xD#xA" sequence that is part of an external
8889
* parsed entity or the literal entity value of an internal parsed entity
8890
* - other characters are processed by appending them to the normalized value
8891
* If the declared value is not CDATA, then the XML processor must further
8892
* process the normalized attribute value by discarding any leading and
8893
* trailing space (#x20) characters, and by replacing sequences of space
8894
* (#x20) characters by a single space (#x20) character.
8895
* All attributes for which no declaration has been read should be treated
8896
* by a non-validating parser as if declared CDATA.
8898
* Returns the AttValue parsed or NULL. The value has to be freed by the
8899
* caller if it was copied, this can be detected by val[*len] == 0.
8903
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8907
const xmlChar *in = NULL, *start, *end, *last;
8908
xmlChar *ret = NULL;
8911
in = (xmlChar *) CUR_PTR;
8912
if (*in != '"' && *in != '\'') {
8913
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8916
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8919
* try to handle in this routine the most common case where no
8920
* allocation of a new string is required and where content is
8924
end = ctxt->input->end;
8927
const xmlChar *oldbase = ctxt->input->base;
8929
if (oldbase != ctxt->input->base) {
8930
long delta = ctxt->input->base - oldbase;
8931
start = start + delta;
8934
end = ctxt->input->end;
8938
* Skip any leading spaces
8940
while ((in < end) && (*in != limit) &&
8941
((*in == 0x20) || (*in == 0x9) ||
8942
(*in == 0xA) || (*in == 0xD))) {
8946
const xmlChar *oldbase = ctxt->input->base;
8948
if (ctxt->instate == XML_PARSER_EOF)
8950
if (oldbase != ctxt->input->base) {
8951
long delta = ctxt->input->base - oldbase;
8952
start = start + delta;
8955
end = ctxt->input->end;
8956
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8957
((ctxt->options & XML_PARSE_HUGE) == 0)) {
8958
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8959
"AttValue length too long\n");
8964
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8965
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8966
if ((*in++ == 0x20) && (*in == 0x20)) break;
8968
const xmlChar *oldbase = ctxt->input->base;
8970
if (ctxt->instate == XML_PARSER_EOF)
8972
if (oldbase != ctxt->input->base) {
8973
long delta = ctxt->input->base - oldbase;
8974
start = start + delta;
8977
end = ctxt->input->end;
8978
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8979
((ctxt->options & XML_PARSE_HUGE) == 0)) {
8980
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8981
"AttValue length too long\n");
8988
* skip the trailing blanks
8990
while ((last[-1] == 0x20) && (last > start)) last--;
8991
while ((in < end) && (*in != limit) &&
8992
((*in == 0x20) || (*in == 0x9) ||
8993
(*in == 0xA) || (*in == 0xD))) {
8996
const xmlChar *oldbase = ctxt->input->base;
8998
if (ctxt->instate == XML_PARSER_EOF)
9000
if (oldbase != ctxt->input->base) {
9001
long delta = ctxt->input->base - oldbase;
9002
start = start + delta;
9004
last = last + delta;
9006
end = ctxt->input->end;
9007
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9008
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9009
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9010
"AttValue length too long\n");
9015
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9016
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9017
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9018
"AttValue length too long\n");
9021
if (*in != limit) goto need_complex;
9023
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9024
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9027
const xmlChar *oldbase = ctxt->input->base;
9029
if (ctxt->instate == XML_PARSER_EOF)
9031
if (oldbase != ctxt->input->base) {
9032
long delta = ctxt->input->base - oldbase;
9033
start = start + delta;
9036
end = ctxt->input->end;
9037
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9038
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9039
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9040
"AttValue length too long\n");
9046
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9047
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9048
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9049
"AttValue length too long\n");
9052
if (*in != limit) goto need_complex;
9056
*len = last - start;
9057
ret = (xmlChar *) start;
9059
if (alloc) *alloc = 1;
9060
ret = xmlStrndup(start, last - start);
9063
if (alloc) *alloc = 0;
9066
if (alloc) *alloc = 1;
9067
return xmlParseAttValueComplex(ctxt, len, normalize);
9071
* xmlParseAttribute2:
9072
* @ctxt: an XML parser context
9073
* @pref: the element prefix
9074
* @elem: the element name
9075
* @prefix: a xmlChar ** used to store the value of the attribute prefix
9076
* @value: a xmlChar ** used to store the value of the attribute
9077
* @len: an int * to save the length of the attribute
9078
* @alloc: an int * to indicate if the attribute was allocated
9080
* parse an attribute in the new SAX2 framework.
9082
* Returns the attribute name, and the value in *value, .
9085
static const xmlChar *
9086
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9087
const xmlChar * pref, const xmlChar * elem,
9088
const xmlChar ** prefix, xmlChar ** value,
9089
int *len, int *alloc)
9091
const xmlChar *name;
9092
xmlChar *val, *internal_val = NULL;
9097
name = xmlParseQName(ctxt, prefix);
9099
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9100
"error parsing attribute name\n");
9105
* get the type if needed
9107
if (ctxt->attsSpecial != NULL) {
9110
type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9111
pref, elem, *prefix, name);
9123
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9126
* Sometimes a second normalisation pass for spaces is needed
9127
* but that only happens if charrefs or entities refernces
9128
* have been used in the attribute value, i.e. the attribute
9129
* value have been extracted in an allocated string already.
9132
const xmlChar *val2;
9134
val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9135
if ((val2 != NULL) && (val2 != val)) {
9137
val = (xmlChar *) val2;
9141
ctxt->instate = XML_PARSER_CONTENT;
9143
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9144
"Specification mandate value for attribute %s\n",
9149
if (*prefix == ctxt->str_xml) {
9151
* Check that xml:lang conforms to the specification
9152
* No more registered as an error, just generate a warning now
9153
* since this was deprecated in XML second edition
9155
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9156
internal_val = xmlStrndup(val, *len);
9157
if (!xmlCheckLanguageID(internal_val)) {
9158
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9159
"Malformed value for xml:lang : %s\n",
9160
internal_val, NULL);
9165
* Check that xml:space conforms to the specification
9167
if (xmlStrEqual(name, BAD_CAST "space")) {
9168
internal_val = xmlStrndup(val, *len);
9169
if (xmlStrEqual(internal_val, BAD_CAST "default"))
9171
else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9174
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9175
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9176
internal_val, NULL);
9180
xmlFree(internal_val);
9188
* xmlParseStartTag2:
9189
* @ctxt: an XML parser context
9191
* parse a start of tag either for rule element or
9192
* EmptyElement. In both case we don't parse the tag closing chars.
9193
* This routine is called when running SAX2 parsing
9195
* [40] STag ::= '<' Name (S Attribute)* S? '>'
9197
* [ WFC: Unique Att Spec ]
9198
* No attribute name may appear more than once in the same start-tag or
9199
* empty-element tag.
9201
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9203
* [ WFC: Unique Att Spec ]
9204
* No attribute name may appear more than once in the same start-tag or
9205
* empty-element tag.
9209
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9211
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9213
* Returns the element name parsed
9216
static const xmlChar *
9217
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9218
const xmlChar **URI, int *tlen) {
9219
const xmlChar *localname;
9220
const xmlChar *prefix;
9221
const xmlChar *attname;
9222
const xmlChar *aprefix;
9223
const xmlChar *nsname;
9225
const xmlChar **atts = ctxt->atts;
9226
int maxatts = ctxt->maxatts;
9227
int nratts, nbatts, nbdef;
9228
int i, j, nbNs, attval, oldline, oldcol;
9229
const xmlChar *base;
9231
int nsNr = ctxt->nsNr;
9233
if (RAW != '<') return(NULL);
9237
* NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9238
* point since the attribute values may be stored as pointers to
9239
* the buffer and calling SHRINK would destroy them !
9240
* The Shrinking is only possible once the full set of attribute
9241
* callbacks have been done.
9245
base = ctxt->input->base;
9246
cur = ctxt->input->cur - ctxt->input->base;
9247
oldline = ctxt->input->line;
9248
oldcol = ctxt->input->col;
9254
/* Forget any namespaces added during an earlier parse of this element. */
9257
localname = xmlParseQName(ctxt, &prefix);
9258
if (localname == NULL) {
9259
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9260
"StartTag: invalid element name\n");
9263
*tlen = ctxt->input->cur - ctxt->input->base - cur;
9266
* Now parse the attributes, it ends up with the ending
9272
if (ctxt->input->base != base) goto base_changed;
9274
while (((RAW != '>') &&
9275
((RAW != '/') || (NXT(1) != '>')) &&
9276
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9277
const xmlChar *q = CUR_PTR;
9278
unsigned int cons = ctxt->input->consumed;
9279
int len = -1, alloc = 0;
9281
attname = xmlParseAttribute2(ctxt, prefix, localname,
9282
&aprefix, &attvalue, &len, &alloc);
9283
if (ctxt->input->base != base) {
9284
if ((attvalue != NULL) && (alloc != 0))
9289
if ((attname != NULL) && (attvalue != NULL)) {
9290
if (len < 0) len = xmlStrlen(attvalue);
9291
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9292
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9296
uri = xmlParseURI((const char *) URL);
9298
xmlNsErr(ctxt, XML_WAR_NS_URI,
9299
"xmlns: '%s' is not a valid URI\n",
9302
if (uri->scheme == NULL) {
9303
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9304
"xmlns: URI %s is not absolute\n",
9309
if (URL == ctxt->str_xml_ns) {
9310
if (attname != ctxt->str_xml) {
9311
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312
"xml namespace URI cannot be the default namespace\n",
9315
goto skip_default_ns;
9319
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9320
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9321
"reuse of the xmlns namespace name is forbidden\n",
9323
goto skip_default_ns;
9327
* check that it's not a defined namespace
9329
for (j = 1;j <= nbNs;j++)
9330
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9333
xmlErrAttributeDup(ctxt, NULL, attname);
9335
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9337
if (alloc != 0) xmlFree(attvalue);
9341
if (aprefix == ctxt->str_xmlns) {
9342
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9345
if (attname == ctxt->str_xml) {
9346
if (URL != ctxt->str_xml_ns) {
9347
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9348
"xml namespace prefix mapped to wrong URI\n",
9352
* Do not keep a namespace definition node
9356
if (URL == ctxt->str_xml_ns) {
9357
if (attname != ctxt->str_xml) {
9358
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9359
"xml namespace URI mapped to wrong prefix\n",
9364
if (attname == ctxt->str_xmlns) {
9365
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9366
"redefinition of the xmlns prefix is forbidden\n",
9372
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9373
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9374
"reuse of the xmlns namespace name is forbidden\n",
9378
if ((URL == NULL) || (URL[0] == 0)) {
9379
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9380
"xmlns:%s: Empty XML namespace is not allowed\n",
9381
attname, NULL, NULL);
9384
uri = xmlParseURI((const char *) URL);
9386
xmlNsErr(ctxt, XML_WAR_NS_URI,
9387
"xmlns:%s: '%s' is not a valid URI\n",
9388
attname, URL, NULL);
9390
if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9391
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9392
"xmlns:%s: URI %s is not absolute\n",
9393
attname, URL, NULL);
9400
* check that it's not a defined namespace
9402
for (j = 1;j <= nbNs;j++)
9403
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9406
xmlErrAttributeDup(ctxt, aprefix, attname);
9408
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9410
if (alloc != 0) xmlFree(attvalue);
9412
if (ctxt->input->base != base) goto base_changed;
9417
* Add the pair to atts
9419
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9420
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9421
if (attvalue[len] == 0)
9425
maxatts = ctxt->maxatts;
9428
ctxt->attallocs[nratts++] = alloc;
9429
atts[nbatts++] = attname;
9430
atts[nbatts++] = aprefix;
9431
atts[nbatts++] = NULL; /* the URI will be fetched later */
9432
atts[nbatts++] = attvalue;
9434
atts[nbatts++] = attvalue;
9436
* tag if some deallocation is needed
9438
if (alloc != 0) attval = 1;
9440
if ((attvalue != NULL) && (attvalue[len] == 0))
9447
if (ctxt->instate == XML_PARSER_EOF)
9449
if (ctxt->input->base != base) goto base_changed;
9450
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9452
if (!IS_BLANK_CH(RAW)) {
9453
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9454
"attributes construct error\n");
9458
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9459
(attname == NULL) && (attvalue == NULL)) {
9460
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9461
"xmlParseStartTag: problem parsing attributes\n");
9465
if (ctxt->input->base != base) goto base_changed;
9469
* The attributes defaulting
9471
if (ctxt->attsDefault != NULL) {
9472
xmlDefAttrsPtr defaults;
9474
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9475
if (defaults != NULL) {
9476
for (i = 0;i < defaults->nbAttrs;i++) {
9477
attname = defaults->values[5 * i];
9478
aprefix = defaults->values[5 * i + 1];
9481
* special work for namespaces defaulted defs
9483
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9485
* check that it's not a defined namespace
9487
for (j = 1;j <= nbNs;j++)
9488
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9490
if (j <= nbNs) continue;
9492
nsname = xmlGetNamespace(ctxt, NULL);
9493
if (nsname != defaults->values[5 * i + 2]) {
9494
if (nsPush(ctxt, NULL,
9495
defaults->values[5 * i + 2]) > 0)
9498
} else if (aprefix == ctxt->str_xmlns) {
9500
* check that it's not a defined namespace
9502
for (j = 1;j <= nbNs;j++)
9503
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9505
if (j <= nbNs) continue;
9507
nsname = xmlGetNamespace(ctxt, attname);
9508
if (nsname != defaults->values[2]) {
9509
if (nsPush(ctxt, attname,
9510
defaults->values[5 * i + 2]) > 0)
9515
* check that it's not a defined attribute
9517
for (j = 0;j < nbatts;j+=5) {
9518
if ((attname == atts[j]) && (aprefix == atts[j+1]))
9521
if (j < nbatts) continue;
9523
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9524
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9527
maxatts = ctxt->maxatts;
9530
atts[nbatts++] = attname;
9531
atts[nbatts++] = aprefix;
9532
if (aprefix == NULL)
9533
atts[nbatts++] = NULL;
9535
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9536
atts[nbatts++] = defaults->values[5 * i + 2];
9537
atts[nbatts++] = defaults->values[5 * i + 3];
9538
if ((ctxt->standalone == 1) &&
9539
(defaults->values[5 * i + 4] != NULL)) {
9540
xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9541
"standalone: attribute %s on %s defaulted from external subset\n",
9542
attname, localname);
9551
* The attributes checkings
9553
for (i = 0; i < nbatts;i += 5) {
9555
* The default namespace does not apply to attribute names.
9557
if (atts[i + 1] != NULL) {
9558
nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9559
if (nsname == NULL) {
9560
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9561
"Namespace prefix %s for %s on %s is not defined\n",
9562
atts[i + 1], atts[i], localname);
9564
atts[i + 2] = nsname;
9568
* [ WFC: Unique Att Spec ]
9569
* No attribute name may appear more than once in the same
9570
* start-tag or empty-element tag.
9571
* As extended by the Namespace in XML REC.
9573
for (j = 0; j < i;j += 5) {
9574
if (atts[i] == atts[j]) {
9575
if (atts[i+1] == atts[j+1]) {
9576
xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9579
if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9580
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9581
"Namespaced Attribute %s in '%s' redefined\n",
9582
atts[i], nsname, NULL);
9589
nsname = xmlGetNamespace(ctxt, prefix);
9590
if ((prefix != NULL) && (nsname == NULL)) {
9591
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9592
"Namespace prefix %s on %s is not defined\n",
9593
prefix, localname, NULL);
9599
* SAX: Start of Element !
9601
if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9602
(!ctxt->disableSAX)) {
9604
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9605
nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9606
nbatts / 5, nbdef, atts);
9608
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9609
nsname, 0, NULL, nbatts / 5, nbdef, atts);
9613
* Free up attribute allocated strings if needed
9616
for (i = 3,j = 0; j < nratts;i += 5,j++)
9617
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9618
xmlFree((xmlChar *) atts[i]);
9625
* the attribute strings are valid iif the base didn't changed
9628
for (i = 3,j = 0; j < nratts;i += 5,j++)
9629
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9630
xmlFree((xmlChar *) atts[i]);
9632
ctxt->input->cur = ctxt->input->base + cur;
9633
ctxt->input->line = oldline;
9634
ctxt->input->col = oldcol;
9635
if (ctxt->wellFormed == 1) {
9643
* @ctxt: an XML parser context
9644
* @line: line of the start tag
9645
* @nsNr: number of namespaces on the start tag
9647
* parse an end of tag
9649
* [42] ETag ::= '</' Name S? '>'
9653
* [NS 9] ETag ::= '</' QName S? '>'
9657
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9658
const xmlChar *URI, int line, int nsNr, int tlen) {
9659
const xmlChar *name;
9662
if ((RAW != '<') || (NXT(1) != '/')) {
9663
xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9668
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9669
if (ctxt->input->cur[tlen] == '>') {
9670
ctxt->input->cur += tlen + 1;
9673
ctxt->input->cur += tlen;
9677
name = xmlParseNameAndCompare(ctxt, ctxt->name);
9679
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9683
* We should definitely be at the ending "S? '>'" part
9686
if (ctxt->instate == XML_PARSER_EOF)
9689
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9690
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9695
* [ WFC: Element Type Match ]
9696
* The Name in an element's end-tag must match the element type in the
9700
if (name != (xmlChar*)1) {
9701
if (name == NULL) name = BAD_CAST "unparseable";
9702
if ((line == 0) && (ctxt->node != NULL))
9703
line = ctxt->node->line;
9704
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9705
"Opening and ending tag mismatch: %s line %d and %s\n",
9706
ctxt->name, line, name);
9713
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9714
(!ctxt->disableSAX))
9715
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9725
* @ctxt: an XML parser context
9727
* Parse escaped pure raw content.
9729
* [18] CDSect ::= CDStart CData CDEnd
9731
* [19] CDStart ::= '<![CDATA['
9733
* [20] Data ::= (Char* - (Char* ']]>' Char*))
9735
* [21] CDEnd ::= ']]>'
9738
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9739
xmlChar *buf = NULL;
9741
int size = XML_PARSER_BUFFER_SIZE;
9747
/* Check 2.6.0 was NXT(0) not RAW */
9748
if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9753
ctxt->instate = XML_PARSER_CDATA_SECTION;
9756
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9757
ctxt->instate = XML_PARSER_CONTENT;
9763
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9764
ctxt->instate = XML_PARSER_CONTENT;
9769
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9771
xmlErrMemory(ctxt, NULL);
9774
while (IS_CHAR(cur) &&
9775
((r != ']') || (s != ']') || (cur != '>'))) {
9776
if (len + 5 >= size) {
9779
if ((size > XML_MAX_TEXT_LENGTH) &&
9780
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9781
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9782
"CData section too big found", NULL);
9786
tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9789
xmlErrMemory(ctxt, NULL);
9795
COPY_BUF(rl,buf,len,r);
9803
if (ctxt->instate == XML_PARSER_EOF) {
9813
ctxt->instate = XML_PARSER_CONTENT;
9815
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9816
"CData section not finished\n%.50s\n", buf);
9823
* OK the buffer is to be consumed as cdata.
9825
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9826
if (ctxt->sax->cdataBlock != NULL)
9827
ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9828
else if (ctxt->sax->characters != NULL)
9829
ctxt->sax->characters(ctxt->userData, buf, len);
9836
* @ctxt: an XML parser context
9840
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9844
xmlParseContent(xmlParserCtxtPtr ctxt) {
9846
while ((RAW != 0) &&
9847
((RAW != '<') || (NXT(1) != '/')) &&
9848
(ctxt->instate != XML_PARSER_EOF)) {
9849
const xmlChar *test = CUR_PTR;
9850
unsigned int cons = ctxt->input->consumed;
9851
const xmlChar *cur = ctxt->input->cur;
9854
* First case : a Processing Instruction.
9856
if ((*cur == '<') && (cur[1] == '?')) {
9861
* Second case : a CDSection
9863
/* 2.6.0 test was *cur not RAW */
9864
else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9865
xmlParseCDSect(ctxt);
9869
* Third case : a comment
9871
else if ((*cur == '<') && (NXT(1) == '!') &&
9872
(NXT(2) == '-') && (NXT(3) == '-')) {
9873
xmlParseComment(ctxt);
9874
ctxt->instate = XML_PARSER_CONTENT;
9878
* Fourth case : a sub-element.
9880
else if (*cur == '<') {
9881
xmlParseElement(ctxt);
9885
* Fifth case : a reference. If if has not been resolved,
9886
* parsing returns it's Name, create the node
9889
else if (*cur == '&') {
9890
xmlParseReference(ctxt);
9894
* Last case, text. Note that References are handled directly.
9897
xmlParseCharData(ctxt, 0);
9902
* Pop-up of finished entities.
9904
while ((RAW == 0) && (ctxt->inputNr > 1))
9908
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9909
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9910
"detected an error in element content\n");
9911
ctxt->instate = XML_PARSER_EOF;
9919
* @ctxt: an XML parser context
9921
* parse an XML element, this is highly recursive
9923
* [39] element ::= EmptyElemTag | STag content ETag
9925
* [ WFC: Element Type Match ]
9926
* The Name in an element's end-tag must match the element type in the
9932
xmlParseElement(xmlParserCtxtPtr ctxt) {
9933
const xmlChar *name;
9934
const xmlChar *prefix = NULL;
9935
const xmlChar *URI = NULL;
9936
xmlParserNodeInfo node_info;
9939
int nsNr = ctxt->nsNr;
9941
if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9942
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9943
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9944
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9946
ctxt->instate = XML_PARSER_EOF;
9950
/* Capture start position */
9951
if (ctxt->record_info) {
9952
node_info.begin_pos = ctxt->input->consumed +
9953
(CUR_PTR - ctxt->input->base);
9954
node_info.begin_line = ctxt->input->line;
9957
if (ctxt->spaceNr == 0)
9958
spacePush(ctxt, -1);
9959
else if (*ctxt->space == -2)
9960
spacePush(ctxt, -1);
9962
spacePush(ctxt, *ctxt->space);
9964
line = ctxt->input->line;
9965
#ifdef LIBXML_SAX1_ENABLED
9967
#endif /* LIBXML_SAX1_ENABLED */
9968
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9969
#ifdef LIBXML_SAX1_ENABLED
9971
name = xmlParseStartTag(ctxt);
9972
#endif /* LIBXML_SAX1_ENABLED */
9973
if (ctxt->instate == XML_PARSER_EOF)
9979
namePush(ctxt, name);
9982
#ifdef LIBXML_VALID_ENABLED
9984
* [ VC: Root Element Type ]
9985
* The Name in the document type declaration must match the element
9986
* type of the root element.
9988
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9989
ctxt->node && (ctxt->node == ctxt->myDoc->children))
9990
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9991
#endif /* LIBXML_VALID_ENABLED */
9994
* Check for an Empty Element.
9996
if ((RAW == '/') && (NXT(1) == '>')) {
9999
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10000
(!ctxt->disableSAX))
10001
ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10002
#ifdef LIBXML_SAX1_ENABLED
10004
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10005
(!ctxt->disableSAX))
10006
ctxt->sax->endElement(ctxt->userData, name);
10007
#endif /* LIBXML_SAX1_ENABLED */
10011
if (nsNr != ctxt->nsNr)
10012
nsPop(ctxt, ctxt->nsNr - nsNr);
10013
if ( ret != NULL && ctxt->record_info ) {
10014
node_info.end_pos = ctxt->input->consumed +
10015
(CUR_PTR - ctxt->input->base);
10016
node_info.end_line = ctxt->input->line;
10017
node_info.node = ret;
10018
xmlParserAddNodeInfo(ctxt, &node_info);
10025
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10026
"Couldn't find end of Start Tag %s line %d\n",
10030
* end of parsing of this node.
10035
if (nsNr != ctxt->nsNr)
10036
nsPop(ctxt, ctxt->nsNr - nsNr);
10039
* Capture end position and add node
10041
if ( ret != NULL && ctxt->record_info ) {
10042
node_info.end_pos = ctxt->input->consumed +
10043
(CUR_PTR - ctxt->input->base);
10044
node_info.end_line = ctxt->input->line;
10045
node_info.node = ret;
10046
xmlParserAddNodeInfo(ctxt, &node_info);
10052
* Parse the content of the element:
10054
xmlParseContent(ctxt);
10055
if (ctxt->instate == XML_PARSER_EOF)
10057
if (!IS_BYTE_CHAR(RAW)) {
10058
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10059
"Premature end of data in tag %s line %d\n",
10063
* end of parsing of this node.
10068
if (nsNr != ctxt->nsNr)
10069
nsPop(ctxt, ctxt->nsNr - nsNr);
10074
* parse the end of tag: '</' should be here.
10077
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10080
#ifdef LIBXML_SAX1_ENABLED
10082
xmlParseEndTag1(ctxt, line);
10083
#endif /* LIBXML_SAX1_ENABLED */
10086
* Capture end position and add node
10088
if ( ret != NULL && ctxt->record_info ) {
10089
node_info.end_pos = ctxt->input->consumed +
10090
(CUR_PTR - ctxt->input->base);
10091
node_info.end_line = ctxt->input->line;
10092
node_info.node = ret;
10093
xmlParserAddNodeInfo(ctxt, &node_info);
10098
* xmlParseVersionNum:
10099
* @ctxt: an XML parser context
10101
* parse the XML version value.
10103
* [26] VersionNum ::= '1.' [0-9]+
10105
* In practice allow [0-9].[0-9]+ at that level
10107
* Returns the string giving the XML version number, or NULL
10110
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10111
xmlChar *buf = NULL;
10116
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10118
xmlErrMemory(ctxt, NULL);
10122
if (!((cur >= '0') && (cur <= '9'))) {
10136
while ((cur >= '0') && (cur <= '9')) {
10137
if (len + 1 >= size) {
10141
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10144
xmlErrMemory(ctxt, NULL);
10158
* xmlParseVersionInfo:
10159
* @ctxt: an XML parser context
10161
* parse the XML version.
10163
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10165
* [25] Eq ::= S? '=' S?
10167
* Returns the version string, e.g. "1.0"
10171
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10172
xmlChar *version = NULL;
10174
if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10178
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10185
version = xmlParseVersionNum(ctxt);
10187
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10190
} else if (RAW == '\''){
10192
version = xmlParseVersionNum(ctxt);
10194
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10206
* @ctxt: an XML parser context
10208
* parse the XML encoding name
10210
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10212
* Returns the encoding name value or NULL
10215
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10216
xmlChar *buf = NULL;
10222
if (((cur >= 'a') && (cur <= 'z')) ||
10223
((cur >= 'A') && (cur <= 'Z'))) {
10224
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10226
xmlErrMemory(ctxt, NULL);
10233
while (((cur >= 'a') && (cur <= 'z')) ||
10234
((cur >= 'A') && (cur <= 'Z')) ||
10235
((cur >= '0') && (cur <= '9')) ||
10236
(cur == '.') || (cur == '_') ||
10238
if (len + 1 >= size) {
10242
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10244
xmlErrMemory(ctxt, NULL);
10261
xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10267
* xmlParseEncodingDecl:
10268
* @ctxt: an XML parser context
10270
* parse the XML encoding declaration
10272
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10274
* this setups the conversion filters.
10276
* Returns the encoding value or NULL
10280
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10281
xmlChar *encoding = NULL;
10284
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10288
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10295
encoding = xmlParseEncName(ctxt);
10297
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10300
} else if (RAW == '\''){
10302
encoding = xmlParseEncName(ctxt);
10304
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10308
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10312
* Non standard parsing, allowing the user to ignore encoding
10314
if (ctxt->options & XML_PARSE_IGNORE_ENC)
10318
* UTF-16 encoding stwich has already taken place at this stage,
10319
* more over the little-endian/big-endian selection is already done
10321
if ((encoding != NULL) &&
10322
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10323
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10325
* If no encoding was passed to the parser, that we are
10326
* using UTF-16 and no decoder is present i.e. the
10327
* document is apparently UTF-8 compatible, then raise an
10328
* encoding mismatch fatal error
10330
if ((ctxt->encoding == NULL) &&
10331
(ctxt->input->buf != NULL) &&
10332
(ctxt->input->buf->encoder == NULL)) {
10333
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10334
"Document labelled UTF-16 but has UTF-8 content\n");
10336
if (ctxt->encoding != NULL)
10337
xmlFree((xmlChar *) ctxt->encoding);
10338
ctxt->encoding = encoding;
10341
* UTF-8 encoding is handled natively
10343
else if ((encoding != NULL) &&
10344
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10345
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10346
if (ctxt->encoding != NULL)
10347
xmlFree((xmlChar *) ctxt->encoding);
10348
ctxt->encoding = encoding;
10350
else if (encoding != NULL) {
10351
xmlCharEncodingHandlerPtr handler;
10353
if (ctxt->input->encoding != NULL)
10354
xmlFree((xmlChar *) ctxt->input->encoding);
10355
ctxt->input->encoding = encoding;
10357
handler = xmlFindCharEncodingHandler((const char *) encoding);
10358
if (handler != NULL) {
10359
xmlSwitchToEncoding(ctxt, handler);
10361
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10362
"Unsupported encoding %s\n", encoding);
10372
* @ctxt: an XML parser context
10374
* parse the XML standalone declaration
10376
* [32] SDDecl ::= S 'standalone' Eq
10377
* (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10379
* [ VC: Standalone Document Declaration ]
10380
* TODO The standalone document declaration must have the value "no"
10381
* if any external markup declarations contain declarations of:
10382
* - attributes with default values, if elements to which these
10383
* attributes apply appear in the document without specifications
10384
* of values for these attributes, or
10385
* - entities (other than amp, lt, gt, apos, quot), if references
10386
* to those entities appear in the document, or
10387
* - attributes with values subject to normalization, where the
10388
* attribute appears in the document with a value which will change
10389
* as a result of normalization, or
10390
* - element types with element content, if white space occurs directly
10391
* within any instance of those types.
10394
* 1 if standalone="yes"
10395
* 0 if standalone="no"
10396
* -2 if standalone attribute is missing or invalid
10397
* (A standalone value of -2 means that the XML declaration was found,
10398
* but no value was specified for the standalone attribute).
10402
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10403
int standalone = -2;
10406
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10410
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10411
return(standalone);
10417
if ((RAW == 'n') && (NXT(1) == 'o')) {
10420
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10425
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10428
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10431
} else if (RAW == '"'){
10433
if ((RAW == 'n') && (NXT(1) == 'o')) {
10436
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10441
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10444
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10448
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10451
return(standalone);
10456
* @ctxt: an XML parser context
10458
* parse an XML declaration header
10460
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10464
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10468
* This value for standalone indicates that the document has an
10469
* XML declaration but it does not have a standalone attribute.
10470
* It will be overwritten later if a standalone attribute is found.
10472
ctxt->input->standalone = -2;
10475
* We know that '<?xml' is here.
10479
if (!IS_BLANK_CH(RAW)) {
10480
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10481
"Blank needed after '<?xml'\n");
10486
* We must have the VersionInfo here.
10488
version = xmlParseVersionInfo(ctxt);
10489
if (version == NULL) {
10490
xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10492
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10494
* Changed here for XML-1.0 5th edition
10496
if (ctxt->options & XML_PARSE_OLD10) {
10497
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10498
"Unsupported version '%s'\n",
10501
if ((version[0] == '1') && ((version[1] == '.'))) {
10502
xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10503
"Unsupported version '%s'\n",
10506
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10507
"Unsupported version '%s'\n",
10512
if (ctxt->version != NULL)
10513
xmlFree((void *) ctxt->version);
10514
ctxt->version = version;
10518
* We may have the encoding declaration
10520
if (!IS_BLANK_CH(RAW)) {
10521
if ((RAW == '?') && (NXT(1) == '>')) {
10525
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10527
xmlParseEncodingDecl(ctxt);
10528
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10530
* The XML REC instructs us to stop parsing right here
10536
* We may have the standalone status.
10538
if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10539
if ((RAW == '?') && (NXT(1) == '>')) {
10543
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10547
* We can grow the input buffer freely at that point
10552
ctxt->input->standalone = xmlParseSDDecl(ctxt);
10555
if ((RAW == '?') && (NXT(1) == '>')) {
10557
} else if (RAW == '>') {
10558
/* Deprecated old WD ... */
10559
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10562
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10563
MOVETO_ENDTAG(CUR_PTR);
10570
* @ctxt: an XML parser context
10572
* parse an XML Misc* optional field.
10574
* [27] Misc ::= Comment | PI | S
10578
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10579
while ((ctxt->instate != XML_PARSER_EOF) &&
10580
(((RAW == '<') && (NXT(1) == '?')) ||
10581
(CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10582
IS_BLANK_CH(CUR))) {
10583
if ((RAW == '<') && (NXT(1) == '?')) {
10585
} else if (IS_BLANK_CH(CUR)) {
10588
xmlParseComment(ctxt);
10593
* xmlParseDocument:
10594
* @ctxt: an XML parser context
10596
* parse an XML document (and build a tree if using the standard SAX
10599
* [1] document ::= prolog element Misc*
10601
* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10603
* Returns 0, -1 in case of error. the parser context is augmented
10604
* as a result of the parsing.
10608
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10610
xmlCharEncoding enc;
10614
if ((ctxt == NULL) || (ctxt->input == NULL))
10620
* SAX: detecting the level.
10622
xmlDetectSAX2(ctxt);
10625
* SAX: beginning of the document processing.
10627
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10628
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10629
if (ctxt->instate == XML_PARSER_EOF)
10632
if ((ctxt->encoding == NULL) &&
10633
((ctxt->input->end - ctxt->input->cur) >= 4)) {
10635
* Get the 4 first bytes and decode the charset
10636
* if enc != XML_CHAR_ENCODING_NONE
10637
* plug some encoding conversion routines.
10643
enc = xmlDetectCharEncoding(&start[0], 4);
10644
if (enc != XML_CHAR_ENCODING_NONE) {
10645
xmlSwitchEncoding(ctxt, enc);
10651
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10655
* Check for the XMLDecl in the Prolog.
10656
* do not GROW here to avoid the detected encoder to decode more
10657
* than just the first line, unless the amount of data is really
10658
* too small to hold "<?xml version="1.0" encoding="foo"
10660
if ((ctxt->input->end - ctxt->input->cur) < 35) {
10663
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10666
* Note that we will switch encoding on the fly.
10668
xmlParseXMLDecl(ctxt);
10669
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10671
* The XML REC instructs us to stop parsing right here
10675
ctxt->standalone = ctxt->input->standalone;
10678
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10680
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10681
ctxt->sax->startDocument(ctxt->userData);
10682
if (ctxt->instate == XML_PARSER_EOF)
10686
* The Misc part of the Prolog
10689
xmlParseMisc(ctxt);
10692
* Then possibly doc type declaration(s) and more Misc
10693
* (doctypedecl Misc*)?
10696
if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10698
ctxt->inSubset = 1;
10699
xmlParseDocTypeDecl(ctxt);
10701
ctxt->instate = XML_PARSER_DTD;
10702
xmlParseInternalSubset(ctxt);
10703
if (ctxt->instate == XML_PARSER_EOF)
10708
* Create and update the external subset.
10710
ctxt->inSubset = 2;
10711
if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10712
(!ctxt->disableSAX))
10713
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10714
ctxt->extSubSystem, ctxt->extSubURI);
10715
if (ctxt->instate == XML_PARSER_EOF)
10717
ctxt->inSubset = 0;
10719
xmlCleanSpecialAttr(ctxt);
10721
ctxt->instate = XML_PARSER_PROLOG;
10722
xmlParseMisc(ctxt);
10726
* Time to start parsing the tree itself
10730
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10731
"Start tag expected, '<' not found\n");
10733
ctxt->instate = XML_PARSER_CONTENT;
10734
xmlParseElement(ctxt);
10735
ctxt->instate = XML_PARSER_EPILOG;
10739
* The Misc part at the end
10741
xmlParseMisc(ctxt);
10744
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10746
ctxt->instate = XML_PARSER_EOF;
10750
* SAX: end of the document processing.
10752
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10753
ctxt->sax->endDocument(ctxt->userData);
10756
* Remove locally kept entity definitions if the tree was not built
10758
if ((ctxt->myDoc != NULL) &&
10759
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10760
xmlFreeDoc(ctxt->myDoc);
10761
ctxt->myDoc = NULL;
10764
if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10765
ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10767
ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10768
if (ctxt->nsWellFormed)
10769
ctxt->myDoc->properties |= XML_DOC_NSVALID;
10770
if (ctxt->options & XML_PARSE_OLD10)
10771
ctxt->myDoc->properties |= XML_DOC_OLD10;
10773
if (! ctxt->wellFormed) {
10781
* xmlParseExtParsedEnt:
10782
* @ctxt: an XML parser context
10784
* parse a general parsed entity
10785
* An external general parsed entity is well-formed if it matches the
10786
* production labeled extParsedEnt.
10788
* [78] extParsedEnt ::= TextDecl? content
10790
* Returns 0, -1 in case of error. the parser context is augmented
10791
* as a result of the parsing.
10795
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10797
xmlCharEncoding enc;
10799
if ((ctxt == NULL) || (ctxt->input == NULL))
10802
xmlDefaultSAXHandlerInit();
10804
xmlDetectSAX2(ctxt);
10809
* SAX: beginning of the document processing.
10811
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10815
* Get the 4 first bytes and decode the charset
10816
* if enc != XML_CHAR_ENCODING_NONE
10817
* plug some encoding conversion routines.
10819
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10824
enc = xmlDetectCharEncoding(start, 4);
10825
if (enc != XML_CHAR_ENCODING_NONE) {
10826
xmlSwitchEncoding(ctxt, enc);
10832
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836
* Check for the XMLDecl in the Prolog.
10839
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10842
* Note that we will switch encoding on the fly.
10844
xmlParseXMLDecl(ctxt);
10845
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10847
* The XML REC instructs us to stop parsing right here
10853
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10855
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10856
ctxt->sax->startDocument(ctxt->userData);
10857
if (ctxt->instate == XML_PARSER_EOF)
10861
* Doing validity checking on chunk doesn't make sense
10863
ctxt->instate = XML_PARSER_CONTENT;
10864
ctxt->validate = 0;
10865
ctxt->loadsubset = 0;
10868
xmlParseContent(ctxt);
10869
if (ctxt->instate == XML_PARSER_EOF)
10872
if ((RAW == '<') && (NXT(1) == '/')) {
10873
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10874
} else if (RAW != 0) {
10875
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10879
* SAX: end of the document processing.
10881
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10882
ctxt->sax->endDocument(ctxt->userData);
10884
if (! ctxt->wellFormed) return(-1);
10888
#ifdef LIBXML_PUSH_ENABLED
10889
/************************************************************************
10891
* Progressive parsing interfaces *
10893
************************************************************************/
10896
* xmlParseLookupSequence:
10897
* @ctxt: an XML parser context
10898
* @first: the first char to lookup
10899
* @next: the next char to lookup or zero
10900
* @third: the next char to lookup or zero
10902
* Try to find if a sequence (first, next, third) or just (first next) or
10903
* (first) is available in the input stream.
10904
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
10905
* to avoid rescanning sequences of bytes, it DOES change the state of the
10906
* parser, do not use liberally.
10908
* Returns the index to the current parsing point if the full sequence
10909
* is available, -1 otherwise.
10912
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10913
xmlChar next, xmlChar third) {
10915
xmlParserInputPtr in;
10916
const xmlChar *buf;
10919
if (in == NULL) return(-1);
10920
base = in->cur - in->base;
10921
if (base < 0) return(-1);
10922
if (ctxt->checkIndex > base)
10923
base = ctxt->checkIndex;
10924
if (in->buf == NULL) {
10928
buf = xmlBufContent(in->buf->buffer);
10929
len = xmlBufUse(in->buf->buffer);
10931
/* take into account the sequence length */
10932
if (third) len -= 2;
10933
else if (next) len --;
10934
for (;base < len;base++) {
10935
if (buf[base] == first) {
10937
if ((buf[base + 1] != next) ||
10938
(buf[base + 2] != third)) continue;
10939
} else if (next != 0) {
10940
if (buf[base + 1] != next) continue;
10942
ctxt->checkIndex = 0;
10945
xmlGenericError(xmlGenericErrorContext,
10946
"PP: lookup '%c' found at %d\n",
10948
else if (third == 0)
10949
xmlGenericError(xmlGenericErrorContext,
10950
"PP: lookup '%c%c' found at %d\n",
10951
first, next, base);
10953
xmlGenericError(xmlGenericErrorContext,
10954
"PP: lookup '%c%c%c' found at %d\n",
10955
first, next, third, base);
10957
return(base - (in->cur - in->base));
10960
ctxt->checkIndex = base;
10963
xmlGenericError(xmlGenericErrorContext,
10964
"PP: lookup '%c' failed\n", first);
10965
else if (third == 0)
10966
xmlGenericError(xmlGenericErrorContext,
10967
"PP: lookup '%c%c' failed\n", first, next);
10969
xmlGenericError(xmlGenericErrorContext,
10970
"PP: lookup '%c%c%c' failed\n", first, next, third);
10976
* xmlParseGetLasts:
10977
* @ctxt: an XML parser context
10978
* @lastlt: pointer to store the last '<' from the input
10979
* @lastgt: pointer to store the last '>' from the input
10981
* Lookup the last < and > in the current chunk
10984
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10985
const xmlChar **lastgt) {
10986
const xmlChar *tmp;
10988
if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10989
xmlGenericError(xmlGenericErrorContext,
10990
"Internal error: xmlParseGetLasts\n");
10993
if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10994
tmp = ctxt->input->end;
10996
while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10997
if (tmp < ctxt->input->base) {
11003
while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11004
if (*tmp == '\'') {
11006
while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11007
if (tmp < ctxt->input->end) tmp++;
11008
} else if (*tmp == '"') {
11010
while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11011
if (tmp < ctxt->input->end) tmp++;
11015
if (tmp < ctxt->input->end)
11020
while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11021
if (tmp >= ctxt->input->base)
11033
* xmlCheckCdataPush:
11034
* @cur: pointer to the bock of characters
11035
* @len: length of the block in bytes
11037
* Check that the block of characters is okay as SCdata content [20]
11039
* Returns the number of bytes to pass if okay, a negative index where an
11040
* UTF-8 error occured otherwise
11043
xmlCheckCdataPush(const xmlChar *utf, int len) {
11048
if ((utf == NULL) || (len <= 0))
11051
for (ix = 0; ix < len;) { /* string is 0-terminated */
11053
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11056
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11060
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11061
if (ix + 2 > len) return(ix);
11062
if ((utf[ix+1] & 0xc0 ) != 0x80)
11064
codepoint = (utf[ix] & 0x1f) << 6;
11065
codepoint |= utf[ix+1] & 0x3f;
11066
if (!xmlIsCharQ(codepoint))
11069
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11070
if (ix + 3 > len) return(ix);
11071
if (((utf[ix+1] & 0xc0) != 0x80) ||
11072
((utf[ix+2] & 0xc0) != 0x80))
11074
codepoint = (utf[ix] & 0xf) << 12;
11075
codepoint |= (utf[ix+1] & 0x3f) << 6;
11076
codepoint |= utf[ix+2] & 0x3f;
11077
if (!xmlIsCharQ(codepoint))
11080
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11081
if (ix + 4 > len) return(ix);
11082
if (((utf[ix+1] & 0xc0) != 0x80) ||
11083
((utf[ix+2] & 0xc0) != 0x80) ||
11084
((utf[ix+3] & 0xc0) != 0x80))
11086
codepoint = (utf[ix] & 0x7) << 18;
11087
codepoint |= (utf[ix+1] & 0x3f) << 12;
11088
codepoint |= (utf[ix+2] & 0x3f) << 6;
11089
codepoint |= utf[ix+3] & 0x3f;
11090
if (!xmlIsCharQ(codepoint))
11093
} else /* unknown encoding */
11100
* xmlParseTryOrFinish:
11101
* @ctxt: an XML parser context
11102
* @terminate: last chunk indicator
11104
* Try to progress on parsing
11106
* Returns zero if no parsing was possible
11109
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11113
const xmlChar *lastlt, *lastgt;
11115
if (ctxt->input == NULL)
11119
switch (ctxt->instate) {
11120
case XML_PARSER_EOF:
11121
xmlGenericError(xmlGenericErrorContext,
11122
"PP: try EOF\n"); break;
11123
case XML_PARSER_START:
11124
xmlGenericError(xmlGenericErrorContext,
11125
"PP: try START\n"); break;
11126
case XML_PARSER_MISC:
11127
xmlGenericError(xmlGenericErrorContext,
11128
"PP: try MISC\n");break;
11129
case XML_PARSER_COMMENT:
11130
xmlGenericError(xmlGenericErrorContext,
11131
"PP: try COMMENT\n");break;
11132
case XML_PARSER_PROLOG:
11133
xmlGenericError(xmlGenericErrorContext,
11134
"PP: try PROLOG\n");break;
11135
case XML_PARSER_START_TAG:
11136
xmlGenericError(xmlGenericErrorContext,
11137
"PP: try START_TAG\n");break;
11138
case XML_PARSER_CONTENT:
11139
xmlGenericError(xmlGenericErrorContext,
11140
"PP: try CONTENT\n");break;
11141
case XML_PARSER_CDATA_SECTION:
11142
xmlGenericError(xmlGenericErrorContext,
11143
"PP: try CDATA_SECTION\n");break;
11144
case XML_PARSER_END_TAG:
11145
xmlGenericError(xmlGenericErrorContext,
11146
"PP: try END_TAG\n");break;
11147
case XML_PARSER_ENTITY_DECL:
11148
xmlGenericError(xmlGenericErrorContext,
11149
"PP: try ENTITY_DECL\n");break;
11150
case XML_PARSER_ENTITY_VALUE:
11151
xmlGenericError(xmlGenericErrorContext,
11152
"PP: try ENTITY_VALUE\n");break;
11153
case XML_PARSER_ATTRIBUTE_VALUE:
11154
xmlGenericError(xmlGenericErrorContext,
11155
"PP: try ATTRIBUTE_VALUE\n");break;
11156
case XML_PARSER_DTD:
11157
xmlGenericError(xmlGenericErrorContext,
11158
"PP: try DTD\n");break;
11159
case XML_PARSER_EPILOG:
11160
xmlGenericError(xmlGenericErrorContext,
11161
"PP: try EPILOG\n");break;
11162
case XML_PARSER_PI:
11163
xmlGenericError(xmlGenericErrorContext,
11164
"PP: try PI\n");break;
11165
case XML_PARSER_IGNORE:
11166
xmlGenericError(xmlGenericErrorContext,
11167
"PP: try IGNORE\n");break;
11171
if ((ctxt->input != NULL) &&
11172
(ctxt->input->cur - ctxt->input->base > 4096)) {
11174
ctxt->checkIndex = 0;
11176
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11178
while (ctxt->instate != XML_PARSER_EOF) {
11179
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11184
* Pop-up of finished entities.
11186
while ((RAW == 0) && (ctxt->inputNr > 1))
11189
if (ctxt->input == NULL) break;
11190
if (ctxt->input->buf == NULL)
11191
avail = ctxt->input->length -
11192
(ctxt->input->cur - ctxt->input->base);
11195
* If we are operating on converted input, try to flush
11196
* remainng chars to avoid them stalling in the non-converted
11197
* buffer. But do not do this in document start where
11198
* encoding="..." may not have been read and we work on a
11199
* guessed encoding.
11201
if ((ctxt->instate != XML_PARSER_START) &&
11202
(ctxt->input->buf->raw != NULL) &&
11203
(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11204
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11206
size_t current = ctxt->input->cur - ctxt->input->base;
11208
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11209
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11212
avail = xmlBufUse(ctxt->input->buf->buffer) -
11213
(ctxt->input->cur - ctxt->input->base);
11217
switch (ctxt->instate) {
11218
case XML_PARSER_EOF:
11220
* Document parsing is done !
11223
case XML_PARSER_START:
11224
if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11226
xmlCharEncoding enc;
11229
* Very first chars read from the document flow.
11235
* Get the 4 first bytes and decode the charset
11236
* if enc != XML_CHAR_ENCODING_NONE
11237
* plug some encoding conversion routines,
11238
* else xmlSwitchEncoding will set to (default)
11245
enc = xmlDetectCharEncoding(start, 4);
11246
xmlSwitchEncoding(ctxt, enc);
11252
cur = ctxt->input->cur[0];
11253
next = ctxt->input->cur[1];
11255
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11256
ctxt->sax->setDocumentLocator(ctxt->userData,
11257
&xmlDefaultSAXLocator);
11258
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11259
ctxt->instate = XML_PARSER_EOF;
11261
xmlGenericError(xmlGenericErrorContext,
11262
"PP: entering EOF\n");
11264
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11265
ctxt->sax->endDocument(ctxt->userData);
11268
if ((cur == '<') && (next == '?')) {
11269
/* PI or XML decl */
11270
if (avail < 5) return(ret);
11271
if ((!terminate) &&
11272
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11274
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11275
ctxt->sax->setDocumentLocator(ctxt->userData,
11276
&xmlDefaultSAXLocator);
11277
if ((ctxt->input->cur[2] == 'x') &&
11278
(ctxt->input->cur[3] == 'm') &&
11279
(ctxt->input->cur[4] == 'l') &&
11280
(IS_BLANK_CH(ctxt->input->cur[5]))) {
11283
xmlGenericError(xmlGenericErrorContext,
11284
"PP: Parsing XML Decl\n");
11286
xmlParseXMLDecl(ctxt);
11287
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11289
* The XML REC instructs us to stop parsing right
11292
ctxt->instate = XML_PARSER_EOF;
11295
ctxt->standalone = ctxt->input->standalone;
11296
if ((ctxt->encoding == NULL) &&
11297
(ctxt->input->encoding != NULL))
11298
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11299
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11300
(!ctxt->disableSAX))
11301
ctxt->sax->startDocument(ctxt->userData);
11302
ctxt->instate = XML_PARSER_MISC;
11304
xmlGenericError(xmlGenericErrorContext,
11305
"PP: entering MISC\n");
11308
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11309
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11310
(!ctxt->disableSAX))
11311
ctxt->sax->startDocument(ctxt->userData);
11312
ctxt->instate = XML_PARSER_MISC;
11314
xmlGenericError(xmlGenericErrorContext,
11315
"PP: entering MISC\n");
11319
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11320
ctxt->sax->setDocumentLocator(ctxt->userData,
11321
&xmlDefaultSAXLocator);
11322
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11323
if (ctxt->version == NULL) {
11324
xmlErrMemory(ctxt, NULL);
11327
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11328
(!ctxt->disableSAX))
11329
ctxt->sax->startDocument(ctxt->userData);
11330
ctxt->instate = XML_PARSER_MISC;
11332
xmlGenericError(xmlGenericErrorContext,
11333
"PP: entering MISC\n");
11337
case XML_PARSER_START_TAG: {
11338
const xmlChar *name;
11339
const xmlChar *prefix = NULL;
11340
const xmlChar *URI = NULL;
11341
int nsNr = ctxt->nsNr;
11343
if ((avail < 2) && (ctxt->inputNr == 1))
11345
cur = ctxt->input->cur[0];
11347
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11348
ctxt->instate = XML_PARSER_EOF;
11349
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11350
ctxt->sax->endDocument(ctxt->userData);
11354
if (ctxt->progressive) {
11355
/* > can be found unescaped in attribute values */
11356
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11358
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11362
if (ctxt->spaceNr == 0)
11363
spacePush(ctxt, -1);
11364
else if (*ctxt->space == -2)
11365
spacePush(ctxt, -1);
11367
spacePush(ctxt, *ctxt->space);
11368
#ifdef LIBXML_SAX1_ENABLED
11370
#endif /* LIBXML_SAX1_ENABLED */
11371
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11372
#ifdef LIBXML_SAX1_ENABLED
11374
name = xmlParseStartTag(ctxt);
11375
#endif /* LIBXML_SAX1_ENABLED */
11376
if (ctxt->instate == XML_PARSER_EOF)
11378
if (name == NULL) {
11380
ctxt->instate = XML_PARSER_EOF;
11381
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11382
ctxt->sax->endDocument(ctxt->userData);
11385
#ifdef LIBXML_VALID_ENABLED
11387
* [ VC: Root Element Type ]
11388
* The Name in the document type declaration must match
11389
* the element type of the root element.
11391
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11392
ctxt->node && (ctxt->node == ctxt->myDoc->children))
11393
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11394
#endif /* LIBXML_VALID_ENABLED */
11397
* Check for an Empty Element.
11399
if ((RAW == '/') && (NXT(1) == '>')) {
11403
if ((ctxt->sax != NULL) &&
11404
(ctxt->sax->endElementNs != NULL) &&
11405
(!ctxt->disableSAX))
11406
ctxt->sax->endElementNs(ctxt->userData, name,
11408
if (ctxt->nsNr - nsNr > 0)
11409
nsPop(ctxt, ctxt->nsNr - nsNr);
11410
#ifdef LIBXML_SAX1_ENABLED
11412
if ((ctxt->sax != NULL) &&
11413
(ctxt->sax->endElement != NULL) &&
11414
(!ctxt->disableSAX))
11415
ctxt->sax->endElement(ctxt->userData, name);
11416
#endif /* LIBXML_SAX1_ENABLED */
11418
if (ctxt->instate == XML_PARSER_EOF)
11421
if (ctxt->nameNr == 0) {
11422
ctxt->instate = XML_PARSER_EPILOG;
11424
ctxt->instate = XML_PARSER_CONTENT;
11426
ctxt->progressive = 1;
11432
xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11433
"Couldn't find end of Start Tag %s\n",
11439
nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11440
#ifdef LIBXML_SAX1_ENABLED
11442
namePush(ctxt, name);
11443
#endif /* LIBXML_SAX1_ENABLED */
11445
ctxt->instate = XML_PARSER_CONTENT;
11446
ctxt->progressive = 1;
11449
case XML_PARSER_CONTENT: {
11450
const xmlChar *test;
11452
if ((avail < 2) && (ctxt->inputNr == 1))
11454
cur = ctxt->input->cur[0];
11455
next = ctxt->input->cur[1];
11458
cons = ctxt->input->consumed;
11459
if ((cur == '<') && (next == '/')) {
11460
ctxt->instate = XML_PARSER_END_TAG;
11462
} else if ((cur == '<') && (next == '?')) {
11463
if ((!terminate) &&
11464
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11465
ctxt->progressive = XML_PARSER_PI;
11469
ctxt->instate = XML_PARSER_CONTENT;
11470
ctxt->progressive = 1;
11471
} else if ((cur == '<') && (next != '!')) {
11472
ctxt->instate = XML_PARSER_START_TAG;
11474
} else if ((cur == '<') && (next == '!') &&
11475
(ctxt->input->cur[2] == '-') &&
11476
(ctxt->input->cur[3] == '-')) {
11481
ctxt->input->cur += 4;
11482
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11483
ctxt->input->cur -= 4;
11484
if ((!terminate) && (term < 0)) {
11485
ctxt->progressive = XML_PARSER_COMMENT;
11488
xmlParseComment(ctxt);
11489
ctxt->instate = XML_PARSER_CONTENT;
11490
ctxt->progressive = 1;
11491
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11492
(ctxt->input->cur[2] == '[') &&
11493
(ctxt->input->cur[3] == 'C') &&
11494
(ctxt->input->cur[4] == 'D') &&
11495
(ctxt->input->cur[5] == 'A') &&
11496
(ctxt->input->cur[6] == 'T') &&
11497
(ctxt->input->cur[7] == 'A') &&
11498
(ctxt->input->cur[8] == '[')) {
11500
ctxt->instate = XML_PARSER_CDATA_SECTION;
11502
} else if ((cur == '<') && (next == '!') &&
11505
} else if (cur == '&') {
11506
if ((!terminate) &&
11507
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11509
xmlParseReference(ctxt);
11511
/* TODO Avoid the extra copy, handle directly !!! */
11513
* Goal of the following test is:
11514
* - minimize calls to the SAX 'character' callback
11515
* when they are mergeable
11516
* - handle an problem for isBlank when we only parse
11517
* a sequence of blank chars and the next one is
11518
* not available to check against '<' presence.
11519
* - tries to homogenize the differences in SAX
11520
* callbacks between the push and pull versions
11523
if ((ctxt->inputNr == 1) &&
11524
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11526
if (ctxt->progressive) {
11527
if ((lastlt == NULL) ||
11528
(ctxt->input->cur > lastlt))
11530
} else if (xmlParseLookupSequence(ctxt,
11536
ctxt->checkIndex = 0;
11537
xmlParseCharData(ctxt, 0);
11540
* Pop-up of finished entities.
11542
while ((RAW == 0) && (ctxt->inputNr > 1))
11544
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11545
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11546
"detected an error in element content\n");
11547
ctxt->instate = XML_PARSER_EOF;
11552
case XML_PARSER_END_TAG:
11556
if (ctxt->progressive) {
11557
/* > can be found unescaped in attribute values */
11558
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11560
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11565
xmlParseEndTag2(ctxt,
11566
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11567
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11568
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11571
#ifdef LIBXML_SAX1_ENABLED
11573
xmlParseEndTag1(ctxt, 0);
11574
#endif /* LIBXML_SAX1_ENABLED */
11575
if (ctxt->instate == XML_PARSER_EOF) {
11577
} else if (ctxt->nameNr == 0) {
11578
ctxt->instate = XML_PARSER_EPILOG;
11580
ctxt->instate = XML_PARSER_CONTENT;
11583
case XML_PARSER_CDATA_SECTION: {
11585
* The Push mode need to have the SAX callback for
11586
* cdataBlock merge back contiguous callbacks.
11590
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11592
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11595
tmp = xmlCheckCdataPush(ctxt->input->cur,
11596
XML_PARSER_BIG_BUFFER_SIZE);
11599
ctxt->input->cur += tmp;
11600
goto encoding_error;
11602
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11603
if (ctxt->sax->cdataBlock != NULL)
11604
ctxt->sax->cdataBlock(ctxt->userData,
11605
ctxt->input->cur, tmp);
11606
else if (ctxt->sax->characters != NULL)
11607
ctxt->sax->characters(ctxt->userData,
11608
ctxt->input->cur, tmp);
11610
if (ctxt->instate == XML_PARSER_EOF)
11613
ctxt->checkIndex = 0;
11619
tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11620
if ((tmp < 0) || (tmp != base)) {
11622
ctxt->input->cur += tmp;
11623
goto encoding_error;
11625
if ((ctxt->sax != NULL) && (base == 0) &&
11626
(ctxt->sax->cdataBlock != NULL) &&
11627
(!ctxt->disableSAX)) {
11629
* Special case to provide identical behaviour
11630
* between pull and push parsers on enpty CDATA
11633
if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11634
(!strncmp((const char *)&ctxt->input->cur[-9],
11636
ctxt->sax->cdataBlock(ctxt->userData,
11638
} else if ((ctxt->sax != NULL) && (base > 0) &&
11639
(!ctxt->disableSAX)) {
11640
if (ctxt->sax->cdataBlock != NULL)
11641
ctxt->sax->cdataBlock(ctxt->userData,
11642
ctxt->input->cur, base);
11643
else if (ctxt->sax->characters != NULL)
11644
ctxt->sax->characters(ctxt->userData,
11645
ctxt->input->cur, base);
11647
if (ctxt->instate == XML_PARSER_EOF)
11650
ctxt->checkIndex = 0;
11651
ctxt->instate = XML_PARSER_CONTENT;
11653
xmlGenericError(xmlGenericErrorContext,
11654
"PP: entering CONTENT\n");
11659
case XML_PARSER_MISC:
11661
if (ctxt->input->buf == NULL)
11662
avail = ctxt->input->length -
11663
(ctxt->input->cur - ctxt->input->base);
11665
avail = xmlBufUse(ctxt->input->buf->buffer) -
11666
(ctxt->input->cur - ctxt->input->base);
11669
cur = ctxt->input->cur[0];
11670
next = ctxt->input->cur[1];
11671
if ((cur == '<') && (next == '?')) {
11672
if ((!terminate) &&
11673
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11674
ctxt->progressive = XML_PARSER_PI;
11678
xmlGenericError(xmlGenericErrorContext,
11679
"PP: Parsing PI\n");
11682
if (ctxt->instate == XML_PARSER_EOF)
11684
ctxt->instate = XML_PARSER_MISC;
11685
ctxt->progressive = 1;
11686
ctxt->checkIndex = 0;
11687
} else if ((cur == '<') && (next == '!') &&
11688
(ctxt->input->cur[2] == '-') &&
11689
(ctxt->input->cur[3] == '-')) {
11690
if ((!terminate) &&
11691
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11692
ctxt->progressive = XML_PARSER_COMMENT;
11696
xmlGenericError(xmlGenericErrorContext,
11697
"PP: Parsing Comment\n");
11699
xmlParseComment(ctxt);
11700
if (ctxt->instate == XML_PARSER_EOF)
11702
ctxt->instate = XML_PARSER_MISC;
11703
ctxt->progressive = 1;
11704
ctxt->checkIndex = 0;
11705
} else if ((cur == '<') && (next == '!') &&
11706
(ctxt->input->cur[2] == 'D') &&
11707
(ctxt->input->cur[3] == 'O') &&
11708
(ctxt->input->cur[4] == 'C') &&
11709
(ctxt->input->cur[5] == 'T') &&
11710
(ctxt->input->cur[6] == 'Y') &&
11711
(ctxt->input->cur[7] == 'P') &&
11712
(ctxt->input->cur[8] == 'E')) {
11713
if ((!terminate) &&
11714
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11715
ctxt->progressive = XML_PARSER_DTD;
11719
xmlGenericError(xmlGenericErrorContext,
11720
"PP: Parsing internal subset\n");
11722
ctxt->inSubset = 1;
11723
ctxt->progressive = 0;
11724
ctxt->checkIndex = 0;
11725
xmlParseDocTypeDecl(ctxt);
11726
if (ctxt->instate == XML_PARSER_EOF)
11729
ctxt->instate = XML_PARSER_DTD;
11731
xmlGenericError(xmlGenericErrorContext,
11732
"PP: entering DTD\n");
11736
* Create and update the external subset.
11738
ctxt->inSubset = 2;
11739
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11740
(ctxt->sax->externalSubset != NULL))
11741
ctxt->sax->externalSubset(ctxt->userData,
11742
ctxt->intSubName, ctxt->extSubSystem,
11744
ctxt->inSubset = 0;
11745
xmlCleanSpecialAttr(ctxt);
11746
ctxt->instate = XML_PARSER_PROLOG;
11748
xmlGenericError(xmlGenericErrorContext,
11749
"PP: entering PROLOG\n");
11752
} else if ((cur == '<') && (next == '!') &&
11756
ctxt->instate = XML_PARSER_START_TAG;
11757
ctxt->progressive = XML_PARSER_START_TAG;
11758
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11760
xmlGenericError(xmlGenericErrorContext,
11761
"PP: entering START_TAG\n");
11765
case XML_PARSER_PROLOG:
11767
if (ctxt->input->buf == NULL)
11768
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11770
avail = xmlBufUse(ctxt->input->buf->buffer) -
11771
(ctxt->input->cur - ctxt->input->base);
11774
cur = ctxt->input->cur[0];
11775
next = ctxt->input->cur[1];
11776
if ((cur == '<') && (next == '?')) {
11777
if ((!terminate) &&
11778
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11779
ctxt->progressive = XML_PARSER_PI;
11783
xmlGenericError(xmlGenericErrorContext,
11784
"PP: Parsing PI\n");
11787
if (ctxt->instate == XML_PARSER_EOF)
11789
ctxt->instate = XML_PARSER_PROLOG;
11790
ctxt->progressive = 1;
11791
} else if ((cur == '<') && (next == '!') &&
11792
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11793
if ((!terminate) &&
11794
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11795
ctxt->progressive = XML_PARSER_COMMENT;
11799
xmlGenericError(xmlGenericErrorContext,
11800
"PP: Parsing Comment\n");
11802
xmlParseComment(ctxt);
11803
if (ctxt->instate == XML_PARSER_EOF)
11805
ctxt->instate = XML_PARSER_PROLOG;
11806
ctxt->progressive = 1;
11807
} else if ((cur == '<') && (next == '!') &&
11811
ctxt->instate = XML_PARSER_START_TAG;
11812
if (ctxt->progressive == 0)
11813
ctxt->progressive = XML_PARSER_START_TAG;
11814
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11816
xmlGenericError(xmlGenericErrorContext,
11817
"PP: entering START_TAG\n");
11821
case XML_PARSER_EPILOG:
11823
if (ctxt->input->buf == NULL)
11824
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11826
avail = xmlBufUse(ctxt->input->buf->buffer) -
11827
(ctxt->input->cur - ctxt->input->base);
11830
cur = ctxt->input->cur[0];
11831
next = ctxt->input->cur[1];
11832
if ((cur == '<') && (next == '?')) {
11833
if ((!terminate) &&
11834
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11835
ctxt->progressive = XML_PARSER_PI;
11839
xmlGenericError(xmlGenericErrorContext,
11840
"PP: Parsing PI\n");
11843
if (ctxt->instate == XML_PARSER_EOF)
11845
ctxt->instate = XML_PARSER_EPILOG;
11846
ctxt->progressive = 1;
11847
} else if ((cur == '<') && (next == '!') &&
11848
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11849
if ((!terminate) &&
11850
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11851
ctxt->progressive = XML_PARSER_COMMENT;
11855
xmlGenericError(xmlGenericErrorContext,
11856
"PP: Parsing Comment\n");
11858
xmlParseComment(ctxt);
11859
if (ctxt->instate == XML_PARSER_EOF)
11861
ctxt->instate = XML_PARSER_EPILOG;
11862
ctxt->progressive = 1;
11863
} else if ((cur == '<') && (next == '!') &&
11867
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11868
ctxt->instate = XML_PARSER_EOF;
11870
xmlGenericError(xmlGenericErrorContext,
11871
"PP: entering EOF\n");
11873
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11874
ctxt->sax->endDocument(ctxt->userData);
11878
case XML_PARSER_DTD: {
11880
* Sorry but progressive parsing of the internal subset
11881
* is not expected to be supported. We first check that
11882
* the full content of the internal subset is available and
11883
* the parsing is launched only at that point.
11884
* Internal subset ends up with "']' S? '>'" in an unescaped
11885
* section and not in a ']]>' sequence which are conditional
11886
* sections (whoever argued to keep that crap in XML deserve
11887
* a place in hell !).
11894
base = ctxt->input->cur - ctxt->input->base;
11895
if (base < 0) return(0);
11896
if (ctxt->checkIndex > base)
11897
base = ctxt->checkIndex;
11898
buf = xmlBufContent(ctxt->input->buf->buffer);
11899
use = xmlBufUse(ctxt->input->buf->buffer);
11900
for (;(unsigned int) base < use; base++) {
11902
if (buf[base] == quote)
11906
if ((quote == 0) && (buf[base] == '<')) {
11908
/* special handling of comments */
11909
if (((unsigned int) base + 4 < use) &&
11910
(buf[base + 1] == '!') &&
11911
(buf[base + 2] == '-') &&
11912
(buf[base + 3] == '-')) {
11913
for (;(unsigned int) base + 3 < use; base++) {
11914
if ((buf[base] == '-') &&
11915
(buf[base + 1] == '-') &&
11916
(buf[base + 2] == '>')) {
11924
fprintf(stderr, "unfinished comment\n");
11931
if (buf[base] == '"') {
11935
if (buf[base] == '\'') {
11939
if (buf[base] == ']') {
11941
fprintf(stderr, "%c%c%c%c: ", buf[base],
11942
buf[base + 1], buf[base + 2], buf[base + 3]);
11944
if ((unsigned int) base +1 >= use)
11946
if (buf[base + 1] == ']') {
11947
/* conditional crap, skip both ']' ! */
11951
for (i = 1; (unsigned int) base + i < use; i++) {
11952
if (buf[base + i] == '>') {
11954
fprintf(stderr, "found\n");
11956
goto found_end_int_subset;
11958
if (!IS_BLANK_CH(buf[base + i])) {
11960
fprintf(stderr, "not found\n");
11962
goto not_end_of_int_subset;
11966
fprintf(stderr, "end of stream\n");
11971
not_end_of_int_subset:
11972
continue; /* for */
11975
* We didn't found the end of the Internal subset
11978
ctxt->checkIndex = base;
11980
ctxt->checkIndex = 0;
11983
xmlGenericError(xmlGenericErrorContext,
11984
"PP: lookup of int subset end filed\n");
11988
found_end_int_subset:
11989
ctxt->checkIndex = 0;
11990
xmlParseInternalSubset(ctxt);
11991
if (ctxt->instate == XML_PARSER_EOF)
11993
ctxt->inSubset = 2;
11994
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11995
(ctxt->sax->externalSubset != NULL))
11996
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11997
ctxt->extSubSystem, ctxt->extSubURI);
11998
ctxt->inSubset = 0;
11999
xmlCleanSpecialAttr(ctxt);
12000
if (ctxt->instate == XML_PARSER_EOF)
12002
ctxt->instate = XML_PARSER_PROLOG;
12003
ctxt->checkIndex = 0;
12005
xmlGenericError(xmlGenericErrorContext,
12006
"PP: entering PROLOG\n");
12010
case XML_PARSER_COMMENT:
12011
xmlGenericError(xmlGenericErrorContext,
12012
"PP: internal error, state == COMMENT\n");
12013
ctxt->instate = XML_PARSER_CONTENT;
12015
xmlGenericError(xmlGenericErrorContext,
12016
"PP: entering CONTENT\n");
12019
case XML_PARSER_IGNORE:
12020
xmlGenericError(xmlGenericErrorContext,
12021
"PP: internal error, state == IGNORE");
12022
ctxt->instate = XML_PARSER_DTD;
12024
xmlGenericError(xmlGenericErrorContext,
12025
"PP: entering DTD\n");
12028
case XML_PARSER_PI:
12029
xmlGenericError(xmlGenericErrorContext,
12030
"PP: internal error, state == PI\n");
12031
ctxt->instate = XML_PARSER_CONTENT;
12033
xmlGenericError(xmlGenericErrorContext,
12034
"PP: entering CONTENT\n");
12037
case XML_PARSER_ENTITY_DECL:
12038
xmlGenericError(xmlGenericErrorContext,
12039
"PP: internal error, state == ENTITY_DECL\n");
12040
ctxt->instate = XML_PARSER_DTD;
12042
xmlGenericError(xmlGenericErrorContext,
12043
"PP: entering DTD\n");
12046
case XML_PARSER_ENTITY_VALUE:
12047
xmlGenericError(xmlGenericErrorContext,
12048
"PP: internal error, state == ENTITY_VALUE\n");
12049
ctxt->instate = XML_PARSER_CONTENT;
12051
xmlGenericError(xmlGenericErrorContext,
12052
"PP: entering DTD\n");
12055
case XML_PARSER_ATTRIBUTE_VALUE:
12056
xmlGenericError(xmlGenericErrorContext,
12057
"PP: internal error, state == ATTRIBUTE_VALUE\n");
12058
ctxt->instate = XML_PARSER_START_TAG;
12060
xmlGenericError(xmlGenericErrorContext,
12061
"PP: entering START_TAG\n");
12064
case XML_PARSER_SYSTEM_LITERAL:
12065
xmlGenericError(xmlGenericErrorContext,
12066
"PP: internal error, state == SYSTEM_LITERAL\n");
12067
ctxt->instate = XML_PARSER_START_TAG;
12069
xmlGenericError(xmlGenericErrorContext,
12070
"PP: entering START_TAG\n");
12073
case XML_PARSER_PUBLIC_LITERAL:
12074
xmlGenericError(xmlGenericErrorContext,
12075
"PP: internal error, state == PUBLIC_LITERAL\n");
12076
ctxt->instate = XML_PARSER_START_TAG;
12078
xmlGenericError(xmlGenericErrorContext,
12079
"PP: entering START_TAG\n");
12086
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12093
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12094
ctxt->input->cur[0], ctxt->input->cur[1],
12095
ctxt->input->cur[2], ctxt->input->cur[3]);
12096
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12097
"Input is not proper UTF-8, indicate encoding !\n%s",
12098
BAD_CAST buffer, NULL);
12104
* xmlParseCheckTransition:
12105
* @ctxt: an XML parser context
12106
* @chunk: a char array
12107
* @size: the size in byte of the chunk
12109
* Check depending on the current parser state if the chunk given must be
12110
* processed immediately or one need more data to advance on parsing.
12112
* Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12115
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12116
if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12118
if (ctxt->instate == XML_PARSER_START_TAG) {
12119
if (memchr(chunk, '>', size) != NULL)
12123
if (ctxt->progressive == XML_PARSER_COMMENT) {
12124
if (memchr(chunk, '>', size) != NULL)
12128
if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12129
if (memchr(chunk, '>', size) != NULL)
12133
if (ctxt->progressive == XML_PARSER_PI) {
12134
if (memchr(chunk, '>', size) != NULL)
12138
if (ctxt->instate == XML_PARSER_END_TAG) {
12139
if (memchr(chunk, '>', size) != NULL)
12143
if ((ctxt->progressive == XML_PARSER_DTD) ||
12144
(ctxt->instate == XML_PARSER_DTD)) {
12145
if (memchr(chunk, '>', size) != NULL)
12154
* @ctxt: an XML parser context
12155
* @chunk: an char array
12156
* @size: the size in byte of the chunk
12157
* @terminate: last chunk indicator
12159
* Parse a Chunk of memory
12161
* Returns zero if no error, the xmlParserErrors otherwise.
12164
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12168
size_t old_avail = 0;
12172
return(XML_ERR_INTERNAL_ERROR);
12173
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12174
return(ctxt->errNo);
12175
if (ctxt->instate == XML_PARSER_EOF)
12177
if (ctxt->instate == XML_PARSER_START)
12178
xmlDetectSAX2(ctxt);
12179
if ((size > 0) && (chunk != NULL) && (!terminate) &&
12180
(chunk[size - 1] == '\r')) {
12187
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12188
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12189
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12190
size_t cur = ctxt->input->cur - ctxt->input->base;
12193
old_avail = xmlBufUse(ctxt->input->buf->buffer);
12195
* Specific handling if we autodetected an encoding, we should not
12196
* push more than the first line ... which depend on the encoding
12197
* And only push the rest once the final encoding was detected
12199
if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12200
(ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12201
unsigned int len = 45;
12203
if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12204
BAD_CAST "UTF-16")) ||
12205
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12206
BAD_CAST "UTF16")))
12208
else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12209
BAD_CAST "UCS-4")) ||
12210
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12214
if (ctxt->input->buf->rawconsumed < len)
12215
len -= ctxt->input->buf->rawconsumed;
12218
* Change size for reading the initial declaration only
12219
* if size is greater than len. Otherwise, memmove in xmlBufferAdd
12220
* will blindly copy extra bytes from memory.
12222
if ((unsigned int) size > len) {
12223
remain = size - len;
12229
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12231
ctxt->errNo = XML_PARSER_EOF;
12232
ctxt->disableSAX = 1;
12233
return (XML_PARSER_EOF);
12235
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12237
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12240
} else if (ctxt->instate != XML_PARSER_EOF) {
12241
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12242
xmlParserInputBufferPtr in = ctxt->input->buf;
12243
if ((in->encoder != NULL) && (in->buffer != NULL) &&
12244
(in->raw != NULL)) {
12246
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12247
size_t current = ctxt->input->cur - ctxt->input->base;
12249
nbchars = xmlCharEncInput(in, terminate);
12252
xmlGenericError(xmlGenericErrorContext,
12253
"xmlParseChunk: encoder error\n");
12254
return(XML_ERR_INVALID_ENCODING);
12256
xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12261
xmlParseTryOrFinish(ctxt, 0);
12263
if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12264
avail = xmlBufUse(ctxt->input->buf->buffer);
12266
* Depending on the current state it may not be such
12267
* a good idea to try parsing if there is nothing in the chunk
12268
* which would be worth doing a parser state transition and we
12269
* need to wait for more data
12271
if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12272
(old_avail == 0) || (avail == 0) ||
12273
(xmlParseCheckTransition(ctxt,
12274
(const char *)&ctxt->input->base[old_avail],
12275
avail - old_avail)))
12276
xmlParseTryOrFinish(ctxt, terminate);
12278
if (ctxt->instate == XML_PARSER_EOF)
12279
return(ctxt->errNo);
12281
if ((ctxt->input != NULL) &&
12282
(((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12283
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12284
((ctxt->options & XML_PARSE_HUGE) == 0)) {
12285
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12286
ctxt->instate = XML_PARSER_EOF;
12288
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12289
return(ctxt->errNo);
12297
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12298
(ctxt->input->buf != NULL)) {
12299
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12301
size_t current = ctxt->input->cur - ctxt->input->base;
12303
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12305
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12310
* Check for termination
12314
if (ctxt->input != NULL) {
12315
if (ctxt->input->buf == NULL)
12316
cur_avail = ctxt->input->length -
12317
(ctxt->input->cur - ctxt->input->base);
12319
cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12320
(ctxt->input->cur - ctxt->input->base);
12323
if ((ctxt->instate != XML_PARSER_EOF) &&
12324
(ctxt->instate != XML_PARSER_EPILOG)) {
12325
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12327
if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12328
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12330
if (ctxt->instate != XML_PARSER_EOF) {
12331
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12332
ctxt->sax->endDocument(ctxt->userData);
12334
ctxt->instate = XML_PARSER_EOF;
12336
if (ctxt->wellFormed == 0)
12337
return((xmlParserErrors) ctxt->errNo);
12342
/************************************************************************
12344
* I/O front end functions to the parser *
12346
************************************************************************/
12349
* xmlCreatePushParserCtxt:
12350
* @sax: a SAX handler
12351
* @user_data: The user data returned on SAX callbacks
12352
* @chunk: a pointer to an array of chars
12353
* @size: number of chars in the array
12354
* @filename: an optional file name or URI
12356
* Create a parser context for using the XML parser in push mode.
12357
* If @buffer and @size are non-NULL, the data is used to detect
12358
* the encoding. The remaining characters will be parsed so they
12359
* don't need to be fed in again through xmlParseChunk.
12360
* To allow content encoding detection, @size should be >= 4
12361
* The value of @filename is used for fetching external entities
12362
* and error/warning reports.
12364
* Returns the new parser context or NULL
12368
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12369
const char *chunk, int size, const char *filename) {
12370
xmlParserCtxtPtr ctxt;
12371
xmlParserInputPtr inputStream;
12372
xmlParserInputBufferPtr buf;
12373
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12376
* plug some encoding conversion routines
12378
if ((chunk != NULL) && (size >= 4))
12379
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12381
buf = xmlAllocParserInputBuffer(enc);
12382
if (buf == NULL) return(NULL);
12384
ctxt = xmlNewParserCtxt();
12385
if (ctxt == NULL) {
12386
xmlErrMemory(NULL, "creating parser: out of memory\n");
12387
xmlFreeParserInputBuffer(buf);
12390
ctxt->dictNames = 1;
12391
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12392
if (ctxt->pushTab == NULL) {
12393
xmlErrMemory(ctxt, NULL);
12394
xmlFreeParserInputBuffer(buf);
12395
xmlFreeParserCtxt(ctxt);
12399
#ifdef LIBXML_SAX1_ENABLED
12400
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12401
#endif /* LIBXML_SAX1_ENABLED */
12402
xmlFree(ctxt->sax);
12403
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12404
if (ctxt->sax == NULL) {
12405
xmlErrMemory(ctxt, NULL);
12406
xmlFreeParserInputBuffer(buf);
12407
xmlFreeParserCtxt(ctxt);
12410
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12411
if (sax->initialized == XML_SAX2_MAGIC)
12412
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12414
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12415
if (user_data != NULL)
12416
ctxt->userData = user_data;
12418
if (filename == NULL) {
12419
ctxt->directory = NULL;
12421
ctxt->directory = xmlParserGetDirectory(filename);
12424
inputStream = xmlNewInputStream(ctxt);
12425
if (inputStream == NULL) {
12426
xmlFreeParserCtxt(ctxt);
12427
xmlFreeParserInputBuffer(buf);
12431
if (filename == NULL)
12432
inputStream->filename = NULL;
12434
inputStream->filename = (char *)
12435
xmlCanonicPath((const xmlChar *) filename);
12436
if (inputStream->filename == NULL) {
12437
xmlFreeParserCtxt(ctxt);
12438
xmlFreeParserInputBuffer(buf);
12442
inputStream->buf = buf;
12443
xmlBufResetInput(inputStream->buf->buffer, inputStream);
12444
inputPush(ctxt, inputStream);
12447
* If the caller didn't provide an initial 'chunk' for determining
12448
* the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12449
* that it can be automatically determined later
12451
if ((size == 0) || (chunk == NULL)) {
12452
ctxt->charset = XML_CHAR_ENCODING_NONE;
12453
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12454
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12455
size_t cur = ctxt->input->cur - ctxt->input->base;
12457
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12459
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12461
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12465
if (enc != XML_CHAR_ENCODING_NONE) {
12466
xmlSwitchEncoding(ctxt, enc);
12471
#endif /* LIBXML_PUSH_ENABLED */
12475
* @ctxt: an XML parser context
12477
* Blocks further parser processing
12480
xmlStopParser(xmlParserCtxtPtr ctxt) {
12483
ctxt->instate = XML_PARSER_EOF;
12484
ctxt->errNo = XML_ERR_USER_STOP;
12485
ctxt->disableSAX = 1;
12486
if (ctxt->input != NULL) {
12487
ctxt->input->cur = BAD_CAST"";
12488
ctxt->input->base = ctxt->input->cur;
12493
* xmlCreateIOParserCtxt:
12494
* @sax: a SAX handler
12495
* @user_data: The user data returned on SAX callbacks
12496
* @ioread: an I/O read function
12497
* @ioclose: an I/O close function
12498
* @ioctx: an I/O handler
12499
* @enc: the charset encoding if known
12501
* Create a parser context for using the XML parser with an existing
12504
* Returns the new parser context or NULL
12507
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12508
xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12509
void *ioctx, xmlCharEncoding enc) {
12510
xmlParserCtxtPtr ctxt;
12511
xmlParserInputPtr inputStream;
12512
xmlParserInputBufferPtr buf;
12514
if (ioread == NULL) return(NULL);
12516
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12518
if (ioclose != NULL)
12523
ctxt = xmlNewParserCtxt();
12524
if (ctxt == NULL) {
12525
xmlFreeParserInputBuffer(buf);
12529
#ifdef LIBXML_SAX1_ENABLED
12530
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12531
#endif /* LIBXML_SAX1_ENABLED */
12532
xmlFree(ctxt->sax);
12533
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12534
if (ctxt->sax == NULL) {
12535
xmlErrMemory(ctxt, NULL);
12536
xmlFreeParserCtxt(ctxt);
12539
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12540
if (sax->initialized == XML_SAX2_MAGIC)
12541
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12543
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12544
if (user_data != NULL)
12545
ctxt->userData = user_data;
12548
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12549
if (inputStream == NULL) {
12550
xmlFreeParserCtxt(ctxt);
12553
inputPush(ctxt, inputStream);
12558
#ifdef LIBXML_VALID_ENABLED
12559
/************************************************************************
12561
* Front ends when parsing a DTD *
12563
************************************************************************/
12567
* @sax: the SAX handler block or NULL
12568
* @input: an Input Buffer
12569
* @enc: the charset encoding if known
12571
* Load and parse a DTD
12573
* Returns the resulting xmlDtdPtr or NULL in case of error.
12574
* @input will be freed by the function in any case.
12578
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12579
xmlCharEncoding enc) {
12580
xmlDtdPtr ret = NULL;
12581
xmlParserCtxtPtr ctxt;
12582
xmlParserInputPtr pinput = NULL;
12588
ctxt = xmlNewParserCtxt();
12589
if (ctxt == NULL) {
12590
xmlFreeParserInputBuffer(input);
12595
* Set-up the SAX context
12598
if (ctxt->sax != NULL)
12599
xmlFree(ctxt->sax);
12601
ctxt->userData = ctxt;
12603
xmlDetectSAX2(ctxt);
12606
* generate a parser input from the I/O handler
12609
pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12610
if (pinput == NULL) {
12611
if (sax != NULL) ctxt->sax = NULL;
12612
xmlFreeParserInputBuffer(input);
12613
xmlFreeParserCtxt(ctxt);
12618
* plug some encoding conversion routines here.
12620
if (xmlPushInput(ctxt, pinput) < 0) {
12621
if (sax != NULL) ctxt->sax = NULL;
12622
xmlFreeParserCtxt(ctxt);
12625
if (enc != XML_CHAR_ENCODING_NONE) {
12626
xmlSwitchEncoding(ctxt, enc);
12629
pinput->filename = NULL;
12632
pinput->base = ctxt->input->cur;
12633
pinput->cur = ctxt->input->cur;
12634
pinput->free = NULL;
12637
* let's parse that entity knowing it's an external subset.
12639
ctxt->inSubset = 2;
12640
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12641
if (ctxt->myDoc == NULL) {
12642
xmlErrMemory(ctxt, "New Doc failed");
12645
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12646
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12647
BAD_CAST "none", BAD_CAST "none");
12649
if ((enc == XML_CHAR_ENCODING_NONE) &&
12650
((ctxt->input->end - ctxt->input->cur) >= 4)) {
12652
* Get the 4 first bytes and decode the charset
12653
* if enc != XML_CHAR_ENCODING_NONE
12654
* plug some encoding conversion routines.
12660
enc = xmlDetectCharEncoding(start, 4);
12661
if (enc != XML_CHAR_ENCODING_NONE) {
12662
xmlSwitchEncoding(ctxt, enc);
12666
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12668
if (ctxt->myDoc != NULL) {
12669
if (ctxt->wellFormed) {
12670
ret = ctxt->myDoc->extSubset;
12671
ctxt->myDoc->extSubset = NULL;
12676
tmp = ret->children;
12677
while (tmp != NULL) {
12685
xmlFreeDoc(ctxt->myDoc);
12686
ctxt->myDoc = NULL;
12688
if (sax != NULL) ctxt->sax = NULL;
12689
xmlFreeParserCtxt(ctxt);
12696
* @sax: the SAX handler block
12697
* @ExternalID: a NAME* containing the External ID of the DTD
12698
* @SystemID: a NAME* containing the URL to the DTD
12700
* Load and parse an external subset.
12702
* Returns the resulting xmlDtdPtr or NULL in case of error.
12706
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12707
const xmlChar *SystemID) {
12708
xmlDtdPtr ret = NULL;
12709
xmlParserCtxtPtr ctxt;
12710
xmlParserInputPtr input = NULL;
12711
xmlCharEncoding enc;
12712
xmlChar* systemIdCanonic;
12714
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12716
ctxt = xmlNewParserCtxt();
12717
if (ctxt == NULL) {
12722
* Set-up the SAX context
12725
if (ctxt->sax != NULL)
12726
xmlFree(ctxt->sax);
12728
ctxt->userData = ctxt;
12732
* Canonicalise the system ID
12734
systemIdCanonic = xmlCanonicPath(SystemID);
12735
if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12736
xmlFreeParserCtxt(ctxt);
12741
* Ask the Entity resolver to load the damn thing
12744
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12745
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12747
if (input == NULL) {
12748
if (sax != NULL) ctxt->sax = NULL;
12749
xmlFreeParserCtxt(ctxt);
12750
if (systemIdCanonic != NULL)
12751
xmlFree(systemIdCanonic);
12756
* plug some encoding conversion routines here.
12758
if (xmlPushInput(ctxt, input) < 0) {
12759
if (sax != NULL) ctxt->sax = NULL;
12760
xmlFreeParserCtxt(ctxt);
12761
if (systemIdCanonic != NULL)
12762
xmlFree(systemIdCanonic);
12765
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12766
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12767
xmlSwitchEncoding(ctxt, enc);
12770
if (input->filename == NULL)
12771
input->filename = (char *) systemIdCanonic;
12773
xmlFree(systemIdCanonic);
12776
input->base = ctxt->input->cur;
12777
input->cur = ctxt->input->cur;
12778
input->free = NULL;
12781
* let's parse that entity knowing it's an external subset.
12783
ctxt->inSubset = 2;
12784
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12785
if (ctxt->myDoc == NULL) {
12786
xmlErrMemory(ctxt, "New Doc failed");
12787
if (sax != NULL) ctxt->sax = NULL;
12788
xmlFreeParserCtxt(ctxt);
12791
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793
ExternalID, SystemID);
12794
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12796
if (ctxt->myDoc != NULL) {
12797
if (ctxt->wellFormed) {
12798
ret = ctxt->myDoc->extSubset;
12799
ctxt->myDoc->extSubset = NULL;
12804
tmp = ret->children;
12805
while (tmp != NULL) {
12813
xmlFreeDoc(ctxt->myDoc);
12814
ctxt->myDoc = NULL;
12816
if (sax != NULL) ctxt->sax = NULL;
12817
xmlFreeParserCtxt(ctxt);
12825
* @ExternalID: a NAME* containing the External ID of the DTD
12826
* @SystemID: a NAME* containing the URL to the DTD
12828
* Load and parse an external subset.
12830
* Returns the resulting xmlDtdPtr or NULL in case of error.
12834
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12835
return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12837
#endif /* LIBXML_VALID_ENABLED */
12839
/************************************************************************
12841
* Front ends when parsing an Entity *
12843
************************************************************************/
12846
* xmlParseCtxtExternalEntity:
12847
* @ctx: the existing parsing context
12848
* @URL: the URL for the entity to load
12849
* @ID: the System ID for the entity to load
12850
* @lst: the return value for the set of parsed nodes
12852
* Parse an external general entity within an existing parsing context
12853
* An external general parsed entity is well-formed if it matches the
12854
* production labeled extParsedEnt.
12856
* [78] extParsedEnt ::= TextDecl? content
12858
* Returns 0 if the entity is well formed, -1 in case of args problem and
12859
* the parser error code otherwise
12863
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12864
const xmlChar *ID, xmlNodePtr *lst) {
12865
xmlParserCtxtPtr ctxt;
12867
xmlNodePtr newRoot;
12868
xmlSAXHandlerPtr oldsax = NULL;
12871
xmlCharEncoding enc;
12873
if (ctx == NULL) return(-1);
12875
if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12876
(ctx->depth > 1024)) {
12877
return(XML_ERR_ENTITY_LOOP);
12882
if ((URL == NULL) && (ID == NULL))
12884
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12887
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12888
if (ctxt == NULL) {
12892
oldsax = ctxt->sax;
12893
ctxt->sax = ctx->sax;
12894
xmlDetectSAX2(ctxt);
12895
newDoc = xmlNewDoc(BAD_CAST "1.0");
12896
if (newDoc == NULL) {
12897
xmlFreeParserCtxt(ctxt);
12900
newDoc->properties = XML_DOC_INTERNAL;
12901
if (ctx->myDoc->dict) {
12902
newDoc->dict = ctx->myDoc->dict;
12903
xmlDictReference(newDoc->dict);
12905
if (ctx->myDoc != NULL) {
12906
newDoc->intSubset = ctx->myDoc->intSubset;
12907
newDoc->extSubset = ctx->myDoc->extSubset;
12909
if (ctx->myDoc->URL != NULL) {
12910
newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12912
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12913
if (newRoot == NULL) {
12914
ctxt->sax = oldsax;
12915
xmlFreeParserCtxt(ctxt);
12916
newDoc->intSubset = NULL;
12917
newDoc->extSubset = NULL;
12918
xmlFreeDoc(newDoc);
12921
xmlAddChild((xmlNodePtr) newDoc, newRoot);
12922
nodePush(ctxt, newDoc->children);
12923
if (ctx->myDoc == NULL) {
12924
ctxt->myDoc = newDoc;
12926
ctxt->myDoc = ctx->myDoc;
12927
newDoc->children->doc = ctx->myDoc;
12931
* Get the 4 first bytes and decode the charset
12932
* if enc != XML_CHAR_ENCODING_NONE
12933
* plug some encoding conversion routines.
12936
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12941
enc = xmlDetectCharEncoding(start, 4);
12942
if (enc != XML_CHAR_ENCODING_NONE) {
12943
xmlSwitchEncoding(ctxt, enc);
12948
* Parse a possible text declaration first
12950
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12951
xmlParseTextDecl(ctxt);
12953
* An XML-1.0 document can't reference an entity not XML-1.0
12955
if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12956
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12957
xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12958
"Version mismatch between document and entity\n");
12963
* If the user provided its own SAX callbacks then reuse the
12964
* useData callback field, otherwise the expected setup in a
12965
* DOM builder is to have userData == ctxt
12967
if (ctx->userData == ctx)
12968
ctxt->userData = ctxt;
12970
ctxt->userData = ctx->userData;
12973
* Doing validity checking on chunk doesn't make sense
12975
ctxt->instate = XML_PARSER_CONTENT;
12976
ctxt->validate = ctx->validate;
12977
ctxt->valid = ctx->valid;
12978
ctxt->loadsubset = ctx->loadsubset;
12979
ctxt->depth = ctx->depth + 1;
12980
ctxt->replaceEntities = ctx->replaceEntities;
12981
if (ctxt->validate) {
12982
ctxt->vctxt.error = ctx->vctxt.error;
12983
ctxt->vctxt.warning = ctx->vctxt.warning;
12985
ctxt->vctxt.error = NULL;
12986
ctxt->vctxt.warning = NULL;
12988
ctxt->vctxt.nodeTab = NULL;
12989
ctxt->vctxt.nodeNr = 0;
12990
ctxt->vctxt.nodeMax = 0;
12991
ctxt->vctxt.node = NULL;
12992
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12993
ctxt->dict = ctx->dict;
12994
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12995
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12996
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12997
ctxt->dictNames = ctx->dictNames;
12998
ctxt->attsDefault = ctx->attsDefault;
12999
ctxt->attsSpecial = ctx->attsSpecial;
13000
ctxt->linenumbers = ctx->linenumbers;
13002
xmlParseContent(ctxt);
13004
ctx->validate = ctxt->validate;
13005
ctx->valid = ctxt->valid;
13006
if ((RAW == '<') && (NXT(1) == '/')) {
13007
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13008
} else if (RAW != 0) {
13009
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13011
if (ctxt->node != newDoc->children) {
13012
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13015
if (!ctxt->wellFormed) {
13016
if (ctxt->errNo == 0)
13025
* Return the newly created nodeset after unlinking it from
13026
* they pseudo parent.
13028
cur = newDoc->children->children;
13030
while (cur != NULL) {
13031
cur->parent = NULL;
13034
newDoc->children->children = NULL;
13038
ctxt->sax = oldsax;
13040
ctxt->attsDefault = NULL;
13041
ctxt->attsSpecial = NULL;
13042
xmlFreeParserCtxt(ctxt);
13043
newDoc->intSubset = NULL;
13044
newDoc->extSubset = NULL;
13045
xmlFreeDoc(newDoc);
13051
* xmlParseExternalEntityPrivate:
13052
* @doc: the document the chunk pertains to
13053
* @oldctxt: the previous parser context if available
13054
* @sax: the SAX handler bloc (possibly NULL)
13055
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13056
* @depth: Used for loop detection, use 0
13057
* @URL: the URL for the entity to load
13058
* @ID: the System ID for the entity to load
13059
* @list: the return value for the set of parsed nodes
13061
* Private version of xmlParseExternalEntity()
13063
* Returns 0 if the entity is well formed, -1 in case of args problem and
13064
* the parser error code otherwise
13067
static xmlParserErrors
13068
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13069
xmlSAXHandlerPtr sax,
13070
void *user_data, int depth, const xmlChar *URL,
13071
const xmlChar *ID, xmlNodePtr *list) {
13072
xmlParserCtxtPtr ctxt;
13074
xmlNodePtr newRoot;
13075
xmlSAXHandlerPtr oldsax = NULL;
13076
xmlParserErrors ret = XML_ERR_OK;
13078
xmlCharEncoding enc;
13080
if (((depth > 40) &&
13081
((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13083
return(XML_ERR_ENTITY_LOOP);
13088
if ((URL == NULL) && (ID == NULL))
13089
return(XML_ERR_INTERNAL_ERROR);
13091
return(XML_ERR_INTERNAL_ERROR);
13094
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13095
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13096
ctxt->userData = ctxt;
13097
if (oldctxt != NULL) {
13098
ctxt->_private = oldctxt->_private;
13099
ctxt->loadsubset = oldctxt->loadsubset;
13100
ctxt->validate = oldctxt->validate;
13101
ctxt->external = oldctxt->external;
13102
ctxt->record_info = oldctxt->record_info;
13103
ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13104
ctxt->node_seq.length = oldctxt->node_seq.length;
13105
ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13108
* Doing validity checking on chunk without context
13109
* doesn't make sense
13111
ctxt->_private = NULL;
13112
ctxt->validate = 0;
13113
ctxt->external = 2;
13114
ctxt->loadsubset = 0;
13117
oldsax = ctxt->sax;
13119
if (user_data != NULL)
13120
ctxt->userData = user_data;
13122
xmlDetectSAX2(ctxt);
13123
newDoc = xmlNewDoc(BAD_CAST "1.0");
13124
if (newDoc == NULL) {
13125
ctxt->node_seq.maximum = 0;
13126
ctxt->node_seq.length = 0;
13127
ctxt->node_seq.buffer = NULL;
13128
xmlFreeParserCtxt(ctxt);
13129
return(XML_ERR_INTERNAL_ERROR);
13131
newDoc->properties = XML_DOC_INTERNAL;
13132
newDoc->intSubset = doc->intSubset;
13133
newDoc->extSubset = doc->extSubset;
13134
newDoc->dict = doc->dict;
13135
xmlDictReference(newDoc->dict);
13137
if (doc->URL != NULL) {
13138
newDoc->URL = xmlStrdup(doc->URL);
13140
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13141
if (newRoot == NULL) {
13143
ctxt->sax = oldsax;
13144
ctxt->node_seq.maximum = 0;
13145
ctxt->node_seq.length = 0;
13146
ctxt->node_seq.buffer = NULL;
13147
xmlFreeParserCtxt(ctxt);
13148
newDoc->intSubset = NULL;
13149
newDoc->extSubset = NULL;
13150
xmlFreeDoc(newDoc);
13151
return(XML_ERR_INTERNAL_ERROR);
13153
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13154
nodePush(ctxt, newDoc->children);
13156
newRoot->doc = doc;
13159
* Get the 4 first bytes and decode the charset
13160
* if enc != XML_CHAR_ENCODING_NONE
13161
* plug some encoding conversion routines.
13164
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13169
enc = xmlDetectCharEncoding(start, 4);
13170
if (enc != XML_CHAR_ENCODING_NONE) {
13171
xmlSwitchEncoding(ctxt, enc);
13176
* Parse a possible text declaration first
13178
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13179
xmlParseTextDecl(ctxt);
13182
ctxt->instate = XML_PARSER_CONTENT;
13183
ctxt->depth = depth;
13185
xmlParseContent(ctxt);
13187
if ((RAW == '<') && (NXT(1) == '/')) {
13188
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13189
} else if (RAW != 0) {
13190
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13192
if (ctxt->node != newDoc->children) {
13193
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13196
if (!ctxt->wellFormed) {
13197
if (ctxt->errNo == 0)
13198
ret = XML_ERR_INTERNAL_ERROR;
13200
ret = (xmlParserErrors)ctxt->errNo;
13202
if (list != NULL) {
13206
* Return the newly created nodeset after unlinking it from
13207
* they pseudo parent.
13209
cur = newDoc->children->children;
13211
while (cur != NULL) {
13212
cur->parent = NULL;
13215
newDoc->children->children = NULL;
13221
* Record in the parent context the number of entities replacement
13222
* done when parsing that reference.
13224
if (oldctxt != NULL)
13225
oldctxt->nbentities += ctxt->nbentities;
13228
* Also record the size of the entity parsed
13230
if (ctxt->input != NULL) {
13231
oldctxt->sizeentities += ctxt->input->consumed;
13232
oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13235
* And record the last error if any
13237
if (ctxt->lastError.code != XML_ERR_OK)
13238
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
ctxt->sax = oldsax;
13242
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13243
oldctxt->node_seq.length = ctxt->node_seq.length;
13244
oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13245
ctxt->node_seq.maximum = 0;
13246
ctxt->node_seq.length = 0;
13247
ctxt->node_seq.buffer = NULL;
13248
xmlFreeParserCtxt(ctxt);
13249
newDoc->intSubset = NULL;
13250
newDoc->extSubset = NULL;
13251
xmlFreeDoc(newDoc);
13256
#ifdef LIBXML_SAX1_ENABLED
13258
* xmlParseExternalEntity:
13259
* @doc: the document the chunk pertains to
13260
* @sax: the SAX handler bloc (possibly NULL)
13261
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13262
* @depth: Used for loop detection, use 0
13263
* @URL: the URL for the entity to load
13264
* @ID: the System ID for the entity to load
13265
* @lst: the return value for the set of parsed nodes
13267
* Parse an external general entity
13268
* An external general parsed entity is well-formed if it matches the
13269
* production labeled extParsedEnt.
13271
* [78] extParsedEnt ::= TextDecl? content
13273
* Returns 0 if the entity is well formed, -1 in case of args problem and
13274
* the parser error code otherwise
13278
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13279
int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13280
return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13285
* xmlParseBalancedChunkMemory:
13286
* @doc: the document the chunk pertains to
13287
* @sax: the SAX handler bloc (possibly NULL)
13288
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13289
* @depth: Used for loop detection, use 0
13290
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13291
* @lst: the return value for the set of parsed nodes
13293
* Parse a well-balanced chunk of an XML document
13294
* called by the parser
13295
* The allowed sequence for the Well Balanced Chunk is the one defined by
13296
* the content production in the XML grammar:
13298
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13300
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13301
* the parser error code otherwise
13305
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13306
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13307
return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13308
depth, string, lst, 0 );
13310
#endif /* LIBXML_SAX1_ENABLED */
13313
* xmlParseBalancedChunkMemoryInternal:
13314
* @oldctxt: the existing parsing context
13315
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13316
* @user_data: the user data field for the parser context
13317
* @lst: the return value for the set of parsed nodes
13320
* Parse a well-balanced chunk of an XML document
13321
* called by the parser
13322
* The allowed sequence for the Well Balanced Chunk is the one defined by
13323
* the content production in the XML grammar:
13325
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13327
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13328
* error code otherwise
13330
* In case recover is set to 1, the nodelist will not be empty even if
13331
* the parsed chunk is not well balanced.
13333
static xmlParserErrors
13334
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13335
const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13336
xmlParserCtxtPtr ctxt;
13337
xmlDocPtr newDoc = NULL;
13338
xmlNodePtr newRoot;
13339
xmlSAXHandlerPtr oldsax = NULL;
13340
xmlNodePtr content = NULL;
13341
xmlNodePtr last = NULL;
13343
xmlParserErrors ret = XML_ERR_OK;
13348
if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13349
(oldctxt->depth > 1024)) {
13350
return(XML_ERR_ENTITY_LOOP);
13356
if (string == NULL)
13357
return(XML_ERR_INTERNAL_ERROR);
13359
size = xmlStrlen(string);
13361
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13362
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13363
if (user_data != NULL)
13364
ctxt->userData = user_data;
13366
ctxt->userData = ctxt;
13367
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13368
ctxt->dict = oldctxt->dict;
13369
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13370
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13371
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13374
/* propagate namespaces down the entity */
13375
for (i = 0;i < oldctxt->nsNr;i += 2) {
13376
nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13380
oldsax = ctxt->sax;
13381
ctxt->sax = oldctxt->sax;
13382
xmlDetectSAX2(ctxt);
13383
ctxt->replaceEntities = oldctxt->replaceEntities;
13384
ctxt->options = oldctxt->options;
13386
ctxt->_private = oldctxt->_private;
13387
if (oldctxt->myDoc == NULL) {
13388
newDoc = xmlNewDoc(BAD_CAST "1.0");
13389
if (newDoc == NULL) {
13390
ctxt->sax = oldsax;
13392
xmlFreeParserCtxt(ctxt);
13393
return(XML_ERR_INTERNAL_ERROR);
13395
newDoc->properties = XML_DOC_INTERNAL;
13396
newDoc->dict = ctxt->dict;
13397
xmlDictReference(newDoc->dict);
13398
ctxt->myDoc = newDoc;
13400
ctxt->myDoc = oldctxt->myDoc;
13401
content = ctxt->myDoc->children;
13402
last = ctxt->myDoc->last;
13404
newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13405
if (newRoot == NULL) {
13406
ctxt->sax = oldsax;
13408
xmlFreeParserCtxt(ctxt);
13409
if (newDoc != NULL) {
13410
xmlFreeDoc(newDoc);
13412
return(XML_ERR_INTERNAL_ERROR);
13414
ctxt->myDoc->children = NULL;
13415
ctxt->myDoc->last = NULL;
13416
xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13417
nodePush(ctxt, ctxt->myDoc->children);
13418
ctxt->instate = XML_PARSER_CONTENT;
13419
ctxt->depth = oldctxt->depth + 1;
13421
ctxt->validate = 0;
13422
ctxt->loadsubset = oldctxt->loadsubset;
13423
if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13425
* ID/IDREF registration will be done in xmlValidateElement below
13427
ctxt->loadsubset |= XML_SKIP_IDS;
13429
ctxt->dictNames = oldctxt->dictNames;
13430
ctxt->attsDefault = oldctxt->attsDefault;
13431
ctxt->attsSpecial = oldctxt->attsSpecial;
13433
xmlParseContent(ctxt);
13434
if ((RAW == '<') && (NXT(1) == '/')) {
13435
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13436
} else if (RAW != 0) {
13437
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13439
if (ctxt->node != ctxt->myDoc->children) {
13440
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13443
if (!ctxt->wellFormed) {
13444
if (ctxt->errNo == 0)
13445
ret = XML_ERR_INTERNAL_ERROR;
13447
ret = (xmlParserErrors)ctxt->errNo;
13452
if ((lst != NULL) && (ret == XML_ERR_OK)) {
13456
* Return the newly created nodeset after unlinking it from
13457
* they pseudo parent.
13459
cur = ctxt->myDoc->children->children;
13461
while (cur != NULL) {
13462
#ifdef LIBXML_VALID_ENABLED
13463
if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13464
(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13465
(cur->type == XML_ELEMENT_NODE)) {
13466
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13467
oldctxt->myDoc, cur);
13469
#endif /* LIBXML_VALID_ENABLED */
13470
cur->parent = NULL;
13473
ctxt->myDoc->children->children = NULL;
13475
if (ctxt->myDoc != NULL) {
13476
xmlFreeNode(ctxt->myDoc->children);
13477
ctxt->myDoc->children = content;
13478
ctxt->myDoc->last = last;
13482
* Record in the parent context the number of entities replacement
13483
* done when parsing that reference.
13485
if (oldctxt != NULL)
13486
oldctxt->nbentities += ctxt->nbentities;
13489
* Also record the last error if any
13491
if (ctxt->lastError.code != XML_ERR_OK)
13492
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13494
ctxt->sax = oldsax;
13496
ctxt->attsDefault = NULL;
13497
ctxt->attsSpecial = NULL;
13498
xmlFreeParserCtxt(ctxt);
13499
if (newDoc != NULL) {
13500
xmlFreeDoc(newDoc);
13507
* xmlParseInNodeContext:
13508
* @node: the context node
13509
* @data: the input string
13510
* @datalen: the input string length in bytes
13511
* @options: a combination of xmlParserOption
13512
* @lst: the return value for the set of parsed nodes
13514
* Parse a well-balanced chunk of an XML document
13515
* within the context (DTD, namespaces, etc ...) of the given node.
13517
* The allowed sequence for the data is a Well Balanced Chunk defined by
13518
* the content production in the XML grammar:
13520
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13522
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13523
* error code otherwise
13526
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13527
int options, xmlNodePtr *lst) {
13529
xmlParserCtxtPtr ctxt;
13530
xmlDocPtr doc = NULL;
13531
xmlNodePtr fake, cur;
13534
xmlParserErrors ret = XML_ERR_OK;
13537
* check all input parameters, grab the document
13539
if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13540
return(XML_ERR_INTERNAL_ERROR);
13541
switch (node->type) {
13542
case XML_ELEMENT_NODE:
13543
case XML_ATTRIBUTE_NODE:
13544
case XML_TEXT_NODE:
13545
case XML_CDATA_SECTION_NODE:
13546
case XML_ENTITY_REF_NODE:
13548
case XML_COMMENT_NODE:
13549
case XML_DOCUMENT_NODE:
13550
case XML_HTML_DOCUMENT_NODE:
13553
return(XML_ERR_INTERNAL_ERROR);
13556
while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13557
(node->type != XML_DOCUMENT_NODE) &&
13558
(node->type != XML_HTML_DOCUMENT_NODE))
13559
node = node->parent;
13561
return(XML_ERR_INTERNAL_ERROR);
13562
if (node->type == XML_ELEMENT_NODE)
13565
doc = (xmlDocPtr) node;
13567
return(XML_ERR_INTERNAL_ERROR);
13570
* allocate a context and set-up everything not related to the
13571
* node position in the tree
13573
if (doc->type == XML_DOCUMENT_NODE)
13574
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13575
#ifdef LIBXML_HTML_ENABLED
13576
else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13577
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13579
* When parsing in context, it makes no sense to add implied
13580
* elements like html/body/etc...
13582
options |= HTML_PARSE_NOIMPLIED;
13586
return(XML_ERR_INTERNAL_ERROR);
13589
return(XML_ERR_NO_MEMORY);
13592
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13593
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13594
* we must wait until the last moment to free the original one.
13596
if (doc->dict != NULL) {
13597
if (ctxt->dict != NULL)
13598
xmlDictFree(ctxt->dict);
13599
ctxt->dict = doc->dict;
13601
options |= XML_PARSE_NODICT;
13603
if (doc->encoding != NULL) {
13604
xmlCharEncodingHandlerPtr hdlr;
13606
if (ctxt->encoding != NULL)
13607
xmlFree((xmlChar *) ctxt->encoding);
13608
ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13610
hdlr = xmlFindCharEncodingHandler(doc->encoding);
13611
if (hdlr != NULL) {
13612
xmlSwitchToEncoding(ctxt, hdlr);
13614
return(XML_ERR_UNSUPPORTED_ENCODING);
13618
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13619
xmlDetectSAX2(ctxt);
13622
fake = xmlNewComment(NULL);
13623
if (fake == NULL) {
13624
xmlFreeParserCtxt(ctxt);
13625
return(XML_ERR_NO_MEMORY);
13627
xmlAddChild(node, fake);
13629
if (node->type == XML_ELEMENT_NODE) {
13630
nodePush(ctxt, node);
13632
* initialize the SAX2 namespaces stack
13635
while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13636
xmlNsPtr ns = cur->nsDef;
13637
const xmlChar *iprefix, *ihref;
13639
while (ns != NULL) {
13641
iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13642
ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13644
iprefix = ns->prefix;
13648
if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13649
nsPush(ctxt, iprefix, ihref);
13656
ctxt->instate = XML_PARSER_CONTENT;
13659
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13661
* ID/IDREF registration will be done in xmlValidateElement below
13663
ctxt->loadsubset |= XML_SKIP_IDS;
13666
#ifdef LIBXML_HTML_ENABLED
13667
if (doc->type == XML_HTML_DOCUMENT_NODE)
13668
__htmlParseContent(ctxt);
13671
xmlParseContent(ctxt);
13674
if ((RAW == '<') && (NXT(1) == '/')) {
13675
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13676
} else if (RAW != 0) {
13677
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13679
if ((ctxt->node != NULL) && (ctxt->node != node)) {
13680
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13681
ctxt->wellFormed = 0;
13684
if (!ctxt->wellFormed) {
13685
if (ctxt->errNo == 0)
13686
ret = XML_ERR_INTERNAL_ERROR;
13688
ret = (xmlParserErrors)ctxt->errNo;
13694
* Return the newly created nodeset after unlinking it from
13695
* the pseudo sibling.
13708
while (cur != NULL) {
13709
cur->parent = NULL;
13713
xmlUnlinkNode(fake);
13717
if (ret != XML_ERR_OK) {
13718
xmlFreeNodeList(*lst);
13722
if (doc->dict != NULL)
13724
xmlFreeParserCtxt(ctxt);
13728
return(XML_ERR_INTERNAL_ERROR);
13732
#ifdef LIBXML_SAX1_ENABLED
13734
* xmlParseBalancedChunkMemoryRecover:
13735
* @doc: the document the chunk pertains to
13736
* @sax: the SAX handler bloc (possibly NULL)
13737
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13738
* @depth: Used for loop detection, use 0
13739
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13740
* @lst: the return value for the set of parsed nodes
13741
* @recover: return nodes even if the data is broken (use 0)
13744
* Parse a well-balanced chunk of an XML document
13745
* called by the parser
13746
* The allowed sequence for the Well Balanced Chunk is the one defined by
13747
* the content production in the XML grammar:
13749
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13751
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13752
* the parser error code otherwise
13754
* In case recover is set to 1, the nodelist will not be empty even if
13755
* the parsed chunk is not well balanced, assuming the parsing succeeded to
13759
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13760
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13762
xmlParserCtxtPtr ctxt;
13764
xmlSAXHandlerPtr oldsax = NULL;
13765
xmlNodePtr content, newRoot;
13770
return(XML_ERR_ENTITY_LOOP);
13776
if (string == NULL)
13779
size = xmlStrlen(string);
13781
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13782
if (ctxt == NULL) return(-1);
13783
ctxt->userData = ctxt;
13785
oldsax = ctxt->sax;
13787
if (user_data != NULL)
13788
ctxt->userData = user_data;
13790
newDoc = xmlNewDoc(BAD_CAST "1.0");
13791
if (newDoc == NULL) {
13792
xmlFreeParserCtxt(ctxt);
13795
newDoc->properties = XML_DOC_INTERNAL;
13796
if ((doc != NULL) && (doc->dict != NULL)) {
13797
xmlDictFree(ctxt->dict);
13798
ctxt->dict = doc->dict;
13799
xmlDictReference(ctxt->dict);
13800
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13801
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13802
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13803
ctxt->dictNames = 1;
13805
xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13808
newDoc->intSubset = doc->intSubset;
13809
newDoc->extSubset = doc->extSubset;
13811
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13812
if (newRoot == NULL) {
13814
ctxt->sax = oldsax;
13815
xmlFreeParserCtxt(ctxt);
13816
newDoc->intSubset = NULL;
13817
newDoc->extSubset = NULL;
13818
xmlFreeDoc(newDoc);
13821
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13822
nodePush(ctxt, newRoot);
13824
ctxt->myDoc = newDoc;
13826
ctxt->myDoc = newDoc;
13827
newDoc->children->doc = doc;
13828
/* Ensure that doc has XML spec namespace */
13829
xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13830
newDoc->oldNs = doc->oldNs;
13832
ctxt->instate = XML_PARSER_CONTENT;
13833
ctxt->depth = depth;
13836
* Doing validity checking on chunk doesn't make sense
13838
ctxt->validate = 0;
13839
ctxt->loadsubset = 0;
13840
xmlDetectSAX2(ctxt);
13842
if ( doc != NULL ){
13843
content = doc->children;
13844
doc->children = NULL;
13845
xmlParseContent(ctxt);
13846
doc->children = content;
13849
xmlParseContent(ctxt);
13851
if ((RAW == '<') && (NXT(1) == '/')) {
13852
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13853
} else if (RAW != 0) {
13854
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13856
if (ctxt->node != newDoc->children) {
13857
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13860
if (!ctxt->wellFormed) {
13861
if (ctxt->errNo == 0)
13869
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13873
* Return the newly created nodeset after unlinking it from
13874
* they pseudo parent.
13876
cur = newDoc->children->children;
13878
while (cur != NULL) {
13879
xmlSetTreeDoc(cur, doc);
13880
cur->parent = NULL;
13883
newDoc->children->children = NULL;
13887
ctxt->sax = oldsax;
13888
xmlFreeParserCtxt(ctxt);
13889
newDoc->intSubset = NULL;
13890
newDoc->extSubset = NULL;
13891
newDoc->oldNs = NULL;
13892
xmlFreeDoc(newDoc);
13898
* xmlSAXParseEntity:
13899
* @sax: the SAX handler block
13900
* @filename: the filename
13902
* parse an XML external entity out of context and build a tree.
13903
* It use the given SAX function block to handle the parsing callback.
13904
* If sax is NULL, fallback to the default DOM tree building routines.
13906
* [78] extParsedEnt ::= TextDecl? content
13908
* This correspond to a "Well Balanced" chunk
13910
* Returns the resulting document tree
13914
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13916
xmlParserCtxtPtr ctxt;
13918
ctxt = xmlCreateFileParserCtxt(filename);
13919
if (ctxt == NULL) {
13923
if (ctxt->sax != NULL)
13924
xmlFree(ctxt->sax);
13926
ctxt->userData = NULL;
13929
xmlParseExtParsedEnt(ctxt);
13931
if (ctxt->wellFormed)
13935
xmlFreeDoc(ctxt->myDoc);
13936
ctxt->myDoc = NULL;
13940
xmlFreeParserCtxt(ctxt);
13947
* @filename: the filename
13949
* parse an XML external entity out of context and build a tree.
13951
* [78] extParsedEnt ::= TextDecl? content
13953
* This correspond to a "Well Balanced" chunk
13955
* Returns the resulting document tree
13959
xmlParseEntity(const char *filename) {
13960
return(xmlSAXParseEntity(NULL, filename));
13962
#endif /* LIBXML_SAX1_ENABLED */
13965
* xmlCreateEntityParserCtxtInternal:
13966
* @URL: the entity URL
13967
* @ID: the entity PUBLIC ID
13968
* @base: a possible base for the target URI
13969
* @pctx: parser context used to set options on new context
13971
* Create a parser context for an external entity
13972
* Automatic support for ZLIB/Compress compressed document is provided
13973
* by default if found at compile-time.
13975
* Returns the new parser context or NULL
13977
static xmlParserCtxtPtr
13978
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13979
const xmlChar *base, xmlParserCtxtPtr pctx) {
13980
xmlParserCtxtPtr ctxt;
13981
xmlParserInputPtr inputStream;
13982
char *directory = NULL;
13985
ctxt = xmlNewParserCtxt();
13986
if (ctxt == NULL) {
13990
if (pctx != NULL) {
13991
ctxt->options = pctx->options;
13992
ctxt->_private = pctx->_private;
13995
uri = xmlBuildURI(URL, base);
13998
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13999
if (inputStream == NULL) {
14000
xmlFreeParserCtxt(ctxt);
14004
inputPush(ctxt, inputStream);
14006
if ((ctxt->directory == NULL) && (directory == NULL))
14007
directory = xmlParserGetDirectory((char *)URL);
14008
if ((ctxt->directory == NULL) && (directory != NULL))
14009
ctxt->directory = directory;
14011
inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14012
if (inputStream == NULL) {
14014
xmlFreeParserCtxt(ctxt);
14018
inputPush(ctxt, inputStream);
14020
if ((ctxt->directory == NULL) && (directory == NULL))
14021
directory = xmlParserGetDirectory((char *)uri);
14022
if ((ctxt->directory == NULL) && (directory != NULL))
14023
ctxt->directory = directory;
14030
* xmlCreateEntityParserCtxt:
14031
* @URL: the entity URL
14032
* @ID: the entity PUBLIC ID
14033
* @base: a possible base for the target URI
14035
* Create a parser context for an external entity
14036
* Automatic support for ZLIB/Compress compressed document is provided
14037
* by default if found at compile-time.
14039
* Returns the new parser context or NULL
14042
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14043
const xmlChar *base) {
14044
return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14048
/************************************************************************
14050
* Front ends when parsing from a file *
14052
************************************************************************/
14055
* xmlCreateURLParserCtxt:
14056
* @filename: the filename or URL
14057
* @options: a combination of xmlParserOption
14059
* Create a parser context for a file or URL content.
14060
* Automatic support for ZLIB/Compress compressed document is provided
14061
* by default if found at compile-time and for file accesses
14063
* Returns the new parser context or NULL
14066
xmlCreateURLParserCtxt(const char *filename, int options)
14068
xmlParserCtxtPtr ctxt;
14069
xmlParserInputPtr inputStream;
14070
char *directory = NULL;
14072
ctxt = xmlNewParserCtxt();
14073
if (ctxt == NULL) {
14074
xmlErrMemory(NULL, "cannot allocate parser context");
14079
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14080
ctxt->linenumbers = 1;
14082
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14083
if (inputStream == NULL) {
14084
xmlFreeParserCtxt(ctxt);
14088
inputPush(ctxt, inputStream);
14089
if ((ctxt->directory == NULL) && (directory == NULL))
14090
directory = xmlParserGetDirectory(filename);
14091
if ((ctxt->directory == NULL) && (directory != NULL))
14092
ctxt->directory = directory;
14098
* xmlCreateFileParserCtxt:
14099
* @filename: the filename
14101
* Create a parser context for a file content.
14102
* Automatic support for ZLIB/Compress compressed document is provided
14103
* by default if found at compile-time.
14105
* Returns the new parser context or NULL
14108
xmlCreateFileParserCtxt(const char *filename)
14110
return(xmlCreateURLParserCtxt(filename, 0));
14113
#ifdef LIBXML_SAX1_ENABLED
14115
* xmlSAXParseFileWithData:
14116
* @sax: the SAX handler block
14117
* @filename: the filename
14118
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14120
* @data: the userdata
14122
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14123
* compressed document is provided by default if found at compile-time.
14124
* It use the given SAX function block to handle the parsing callback.
14125
* If sax is NULL, fallback to the default DOM tree building routines.
14127
* User data (void *) is stored within the parser context in the
14128
* context's _private member, so it is available nearly everywhere in libxml
14130
* Returns the resulting document tree
14134
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14135
int recovery, void *data) {
14137
xmlParserCtxtPtr ctxt;
14141
ctxt = xmlCreateFileParserCtxt(filename);
14142
if (ctxt == NULL) {
14146
if (ctxt->sax != NULL)
14147
xmlFree(ctxt->sax);
14150
xmlDetectSAX2(ctxt);
14152
ctxt->_private = data;
14155
if (ctxt->directory == NULL)
14156
ctxt->directory = xmlParserGetDirectory(filename);
14158
ctxt->recovery = recovery;
14160
xmlParseDocument(ctxt);
14162
if ((ctxt->wellFormed) || recovery) {
14165
if (ctxt->input->buf->compressed > 0)
14166
ret->compression = 9;
14168
ret->compression = ctxt->input->buf->compressed;
14173
xmlFreeDoc(ctxt->myDoc);
14174
ctxt->myDoc = NULL;
14178
xmlFreeParserCtxt(ctxt);
14185
* @sax: the SAX handler block
14186
* @filename: the filename
14187
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14190
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14191
* compressed document is provided by default if found at compile-time.
14192
* It use the given SAX function block to handle the parsing callback.
14193
* If sax is NULL, fallback to the default DOM tree building routines.
14195
* Returns the resulting document tree
14199
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14201
return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14206
* @cur: a pointer to an array of xmlChar
14208
* parse an XML in-memory document and build a tree.
14209
* In the case the document is not Well Formed, a attempt to build a
14210
* tree is tried anyway
14212
* Returns the resulting document tree or NULL in case of failure
14216
xmlRecoverDoc(const xmlChar *cur) {
14217
return(xmlSAXParseDoc(NULL, cur, 1));
14222
* @filename: the filename
14224
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14225
* compressed document is provided by default if found at compile-time.
14227
* Returns the resulting document tree if the file was wellformed,
14232
xmlParseFile(const char *filename) {
14233
return(xmlSAXParseFile(NULL, filename, 0));
14238
* @filename: the filename
14240
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14241
* compressed document is provided by default if found at compile-time.
14242
* In the case the document is not Well Formed, it attempts to build
14245
* Returns the resulting document tree or NULL in case of failure
14249
xmlRecoverFile(const char *filename) {
14250
return(xmlSAXParseFile(NULL, filename, 1));
14255
* xmlSetupParserForBuffer:
14256
* @ctxt: an XML parser context
14257
* @buffer: a xmlChar * buffer
14258
* @filename: a file name
14260
* Setup the parser context to parse a new buffer; Clears any prior
14261
* contents from the parser context. The buffer parameter must not be
14262
* NULL, but the filename parameter can be
14265
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14266
const char* filename)
14268
xmlParserInputPtr input;
14270
if ((ctxt == NULL) || (buffer == NULL))
14273
input = xmlNewInputStream(ctxt);
14274
if (input == NULL) {
14275
xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14276
xmlClearParserCtxt(ctxt);
14280
xmlClearParserCtxt(ctxt);
14281
if (filename != NULL)
14282
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14283
input->base = buffer;
14284
input->cur = buffer;
14285
input->end = &buffer[xmlStrlen(buffer)];
14286
inputPush(ctxt, input);
14290
* xmlSAXUserParseFile:
14291
* @sax: a SAX handler
14292
* @user_data: The user data returned on SAX callbacks
14293
* @filename: a file name
14295
* parse an XML file and call the given SAX handler routines.
14296
* Automatic support for ZLIB/Compress compressed document is provided
14298
* Returns 0 in case of success or a error number otherwise
14301
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14302
const char *filename) {
14304
xmlParserCtxtPtr ctxt;
14306
ctxt = xmlCreateFileParserCtxt(filename);
14307
if (ctxt == NULL) return -1;
14308
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14309
xmlFree(ctxt->sax);
14311
xmlDetectSAX2(ctxt);
14313
if (user_data != NULL)
14314
ctxt->userData = user_data;
14316
xmlParseDocument(ctxt);
14318
if (ctxt->wellFormed)
14321
if (ctxt->errNo != 0)
14328
if (ctxt->myDoc != NULL) {
14329
xmlFreeDoc(ctxt->myDoc);
14330
ctxt->myDoc = NULL;
14332
xmlFreeParserCtxt(ctxt);
14336
#endif /* LIBXML_SAX1_ENABLED */
14338
/************************************************************************
14340
* Front ends when parsing from memory *
14342
************************************************************************/
14345
* xmlCreateMemoryParserCtxt:
14346
* @buffer: a pointer to a char array
14347
* @size: the size of the array
14349
* Create a parser context for an XML in-memory document.
14351
* Returns the new parser context or NULL
14354
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14355
xmlParserCtxtPtr ctxt;
14356
xmlParserInputPtr input;
14357
xmlParserInputBufferPtr buf;
14359
if (buffer == NULL)
14364
ctxt = xmlNewParserCtxt();
14368
/* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14369
buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14371
xmlFreeParserCtxt(ctxt);
14375
input = xmlNewInputStream(ctxt);
14376
if (input == NULL) {
14377
xmlFreeParserInputBuffer(buf);
14378
xmlFreeParserCtxt(ctxt);
14382
input->filename = NULL;
14384
xmlBufResetInput(input->buf->buffer, input);
14386
inputPush(ctxt, input);
14390
#ifdef LIBXML_SAX1_ENABLED
14392
* xmlSAXParseMemoryWithData:
14393
* @sax: the SAX handler block
14394
* @buffer: an pointer to a char array
14395
* @size: the size of the array
14396
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14398
* @data: the userdata
14400
* parse an XML in-memory block and use the given SAX function block
14401
* to handle the parsing callback. If sax is NULL, fallback to the default
14402
* DOM tree building routines.
14404
* User data (void *) is stored within the parser context in the
14405
* context's _private member, so it is available nearly everywhere in libxml
14407
* Returns the resulting document tree
14411
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14412
int size, int recovery, void *data) {
14414
xmlParserCtxtPtr ctxt;
14418
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14419
if (ctxt == NULL) return(NULL);
14421
if (ctxt->sax != NULL)
14422
xmlFree(ctxt->sax);
14425
xmlDetectSAX2(ctxt);
14427
ctxt->_private=data;
14430
ctxt->recovery = recovery;
14432
xmlParseDocument(ctxt);
14434
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14437
xmlFreeDoc(ctxt->myDoc);
14438
ctxt->myDoc = NULL;
14442
xmlFreeParserCtxt(ctxt);
14448
* xmlSAXParseMemory:
14449
* @sax: the SAX handler block
14450
* @buffer: an pointer to a char array
14451
* @size: the size of the array
14452
* @recovery: work in recovery mode, i.e. tries to read not Well Formed
14455
* parse an XML in-memory block and use the given SAX function block
14456
* to handle the parsing callback. If sax is NULL, fallback to the default
14457
* DOM tree building routines.
14459
* Returns the resulting document tree
14462
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14463
int size, int recovery) {
14464
return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14469
* @buffer: an pointer to a char array
14470
* @size: the size of the array
14472
* parse an XML in-memory block and build a tree.
14474
* Returns the resulting document tree
14477
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14478
return(xmlSAXParseMemory(NULL, buffer, size, 0));
14482
* xmlRecoverMemory:
14483
* @buffer: an pointer to a char array
14484
* @size: the size of the array
14486
* parse an XML in-memory block and build a tree.
14487
* In the case the document is not Well Formed, an attempt to
14488
* build a tree is tried anyway
14490
* Returns the resulting document tree or NULL in case of error
14493
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14494
return(xmlSAXParseMemory(NULL, buffer, size, 1));
14498
* xmlSAXUserParseMemory:
14499
* @sax: a SAX handler
14500
* @user_data: The user data returned on SAX callbacks
14501
* @buffer: an in-memory XML document input
14502
* @size: the length of the XML document in bytes
14504
* A better SAX parsing routine.
14505
* parse an XML in-memory buffer and call the given SAX handler routines.
14507
* Returns 0 in case of success or a error number otherwise
14509
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14510
const char *buffer, int size) {
14512
xmlParserCtxtPtr ctxt;
14516
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14517
if (ctxt == NULL) return -1;
14518
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14519
xmlFree(ctxt->sax);
14521
xmlDetectSAX2(ctxt);
14523
if (user_data != NULL)
14524
ctxt->userData = user_data;
14526
xmlParseDocument(ctxt);
14528
if (ctxt->wellFormed)
14531
if (ctxt->errNo != 0)
14538
if (ctxt->myDoc != NULL) {
14539
xmlFreeDoc(ctxt->myDoc);
14540
ctxt->myDoc = NULL;
14542
xmlFreeParserCtxt(ctxt);
14546
#endif /* LIBXML_SAX1_ENABLED */
14549
* xmlCreateDocParserCtxt:
14550
* @cur: a pointer to an array of xmlChar
14552
* Creates a parser context for an XML in-memory document.
14554
* Returns the new parser context or NULL
14557
xmlCreateDocParserCtxt(const xmlChar *cur) {
14562
len = xmlStrlen(cur);
14563
return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14566
#ifdef LIBXML_SAX1_ENABLED
14569
* @sax: the SAX handler block
14570
* @cur: a pointer to an array of xmlChar
14571
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14574
* parse an XML in-memory document and build a tree.
14575
* It use the given SAX function block to handle the parsing callback.
14576
* If sax is NULL, fallback to the default DOM tree building routines.
14578
* Returns the resulting document tree
14582
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14584
xmlParserCtxtPtr ctxt;
14585
xmlSAXHandlerPtr oldsax = NULL;
14587
if (cur == NULL) return(NULL);
14590
ctxt = xmlCreateDocParserCtxt(cur);
14591
if (ctxt == NULL) return(NULL);
14593
oldsax = ctxt->sax;
14595
ctxt->userData = NULL;
14597
xmlDetectSAX2(ctxt);
14599
xmlParseDocument(ctxt);
14600
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14603
xmlFreeDoc(ctxt->myDoc);
14604
ctxt->myDoc = NULL;
14607
ctxt->sax = oldsax;
14608
xmlFreeParserCtxt(ctxt);
14615
* @cur: a pointer to an array of xmlChar
14617
* parse an XML in-memory document and build a tree.
14619
* Returns the resulting document tree
14623
xmlParseDoc(const xmlChar *cur) {
14624
return(xmlSAXParseDoc(NULL, cur, 0));
14626
#endif /* LIBXML_SAX1_ENABLED */
14628
#ifdef LIBXML_LEGACY_ENABLED
14629
/************************************************************************
14631
* Specific function to keep track of entities references *
14632
* and used by the XSLT debugger *
14634
************************************************************************/
14636
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14639
* xmlAddEntityReference:
14640
* @ent : A valid entity
14641
* @firstNode : A valid first node for children of entity
14642
* @lastNode : A valid last node of children entity
14644
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14647
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14648
xmlNodePtr lastNode)
14650
if (xmlEntityRefFunc != NULL) {
14651
(*xmlEntityRefFunc) (ent, firstNode, lastNode);
14657
* xmlSetEntityReferenceFunc:
14658
* @func: A valid function
14660
* Set the function to call call back when a xml reference has been made
14663
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14665
xmlEntityRefFunc = func;
14667
#endif /* LIBXML_LEGACY_ENABLED */
14669
/************************************************************************
14673
************************************************************************/
14675
#ifdef LIBXML_XPATH_ENABLED
14676
#include <libxml/xpath.h>
14679
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14680
static int xmlParserInitialized = 0;
14685
* Initialization function for the XML parser.
14686
* This is not reentrant. Call once before processing in case of
14687
* use in multithreaded programs.
14691
xmlInitParser(void) {
14692
if (xmlParserInitialized != 0)
14695
#ifdef LIBXML_THREAD_ENABLED
14696
__xmlGlobalInitMutexLock();
14697
if (xmlParserInitialized == 0) {
14701
if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14702
(xmlGenericError == NULL))
14703
initGenericErrorDefaultFunc(NULL);
14705
xmlInitializeDict();
14706
xmlInitCharEncodingHandlers();
14707
xmlDefaultSAXHandlerInit();
14708
xmlRegisterDefaultInputCallbacks();
14709
#ifdef LIBXML_OUTPUT_ENABLED
14710
xmlRegisterDefaultOutputCallbacks();
14711
#endif /* LIBXML_OUTPUT_ENABLED */
14712
#ifdef LIBXML_HTML_ENABLED
14713
htmlInitAutoClose();
14714
htmlDefaultSAXHandlerInit();
14716
#ifdef LIBXML_XPATH_ENABLED
14719
xmlParserInitialized = 1;
14720
#ifdef LIBXML_THREAD_ENABLED
14722
__xmlGlobalInitMutexUnlock();
14727
* xmlCleanupParser:
14729
* This function name is somewhat misleading. It does not clean up
14730
* parser state, it cleans up memory allocated by the library itself.
14731
* It is a cleanup function for the XML library. It tries to reclaim all
14732
* related global memory allocated for the library processing.
14733
* It doesn't deallocate any document related memory. One should
14734
* call xmlCleanupParser() only when the process has finished using
14735
* the library and all XML/HTML documents built with it.
14736
* See also xmlInitParser() which has the opposite function of preparing
14737
* the library for operations.
14739
* WARNING: if your application is multithreaded or has plugin support
14740
* calling this may crash the application if another thread or
14741
* a plugin is still using libxml2. It's sometimes very hard to
14742
* guess if libxml2 is in use in the application, some libraries
14743
* or plugins may use it without notice. In case of doubt abstain
14744
* from calling this function or do it just before calling exit()
14745
* to avoid leak reports from valgrind !
14749
xmlCleanupParser(void) {
14750
if (!xmlParserInitialized)
14753
xmlCleanupCharEncodingHandlers();
14754
#ifdef LIBXML_CATALOG_ENABLED
14755
xmlCatalogCleanup();
14758
xmlCleanupInputCallbacks();
14759
#ifdef LIBXML_OUTPUT_ENABLED
14760
xmlCleanupOutputCallbacks();
14762
#ifdef LIBXML_SCHEMAS_ENABLED
14763
xmlSchemaCleanupTypes();
14764
xmlRelaxNGCleanupTypes();
14766
xmlCleanupGlobals();
14767
xmlResetLastError();
14768
xmlCleanupThreads(); /* must be last if called not from the main thread */
14769
xmlCleanupMemory();
14770
xmlParserInitialized = 0;
14773
/************************************************************************
14775
* New set (2.6.0) of simpler and more flexible APIs *
14777
************************************************************************/
14783
* Free a string if it is not owned by the "dict" dictionnary in the
14786
#define DICT_FREE(str) \
14787
if ((str) && ((!dict) || \
14788
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14789
xmlFree((char *)(str));
14793
* @ctxt: an XML parser context
14795
* Reset a parser context
14798
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14800
xmlParserInputPtr input;
14808
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14809
xmlFreeInputStream(input);
14812
ctxt->input = NULL;
14815
if (ctxt->spaceTab != NULL) {
14816
ctxt->spaceTab[0] = -1;
14817
ctxt->space = &ctxt->spaceTab[0];
14819
ctxt->space = NULL;
14829
DICT_FREE(ctxt->version);
14830
ctxt->version = NULL;
14831
DICT_FREE(ctxt->encoding);
14832
ctxt->encoding = NULL;
14833
DICT_FREE(ctxt->directory);
14834
ctxt->directory = NULL;
14835
DICT_FREE(ctxt->extSubURI);
14836
ctxt->extSubURI = NULL;
14837
DICT_FREE(ctxt->extSubSystem);
14838
ctxt->extSubSystem = NULL;
14839
if (ctxt->myDoc != NULL)
14840
xmlFreeDoc(ctxt->myDoc);
14841
ctxt->myDoc = NULL;
14843
ctxt->standalone = -1;
14844
ctxt->hasExternalSubset = 0;
14845
ctxt->hasPErefs = 0;
14847
ctxt->external = 0;
14848
ctxt->instate = XML_PARSER_START;
14851
ctxt->wellFormed = 1;
14852
ctxt->nsWellFormed = 1;
14853
ctxt->disableSAX = 0;
14856
ctxt->vctxt.userData = ctxt;
14857
ctxt->vctxt.error = xmlParserValidityError;
14858
ctxt->vctxt.warning = xmlParserValidityWarning;
14860
ctxt->record_info = 0;
14862
ctxt->checkIndex = 0;
14863
ctxt->inSubset = 0;
14864
ctxt->errNo = XML_ERR_OK;
14866
ctxt->charset = XML_CHAR_ENCODING_UTF8;
14867
ctxt->catalogs = NULL;
14868
ctxt->nbentities = 0;
14869
ctxt->sizeentities = 0;
14870
ctxt->sizeentcopy = 0;
14871
xmlInitNodeInfoSeq(&ctxt->node_seq);
14873
if (ctxt->attsDefault != NULL) {
14874
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14875
ctxt->attsDefault = NULL;
14877
if (ctxt->attsSpecial != NULL) {
14878
xmlHashFree(ctxt->attsSpecial, NULL);
14879
ctxt->attsSpecial = NULL;
14882
#ifdef LIBXML_CATALOG_ENABLED
14883
if (ctxt->catalogs != NULL)
14884
xmlCatalogFreeLocal(ctxt->catalogs);
14886
if (ctxt->lastError.code != XML_ERR_OK)
14887
xmlResetError(&ctxt->lastError);
14891
* xmlCtxtResetPush:
14892
* @ctxt: an XML parser context
14893
* @chunk: a pointer to an array of chars
14894
* @size: number of chars in the array
14895
* @filename: an optional file name or URI
14896
* @encoding: the document encoding, or NULL
14898
* Reset a push parser context
14900
* Returns 0 in case of success and 1 in case of error
14903
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14904
int size, const char *filename, const char *encoding)
14906
xmlParserInputPtr inputStream;
14907
xmlParserInputBufferPtr buf;
14908
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14913
if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14914
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14916
buf = xmlAllocParserInputBuffer(enc);
14920
if (ctxt == NULL) {
14921
xmlFreeParserInputBuffer(buf);
14925
xmlCtxtReset(ctxt);
14927
if (ctxt->pushTab == NULL) {
14928
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14929
sizeof(xmlChar *));
14930
if (ctxt->pushTab == NULL) {
14931
xmlErrMemory(ctxt, NULL);
14932
xmlFreeParserInputBuffer(buf);
14937
if (filename == NULL) {
14938
ctxt->directory = NULL;
14940
ctxt->directory = xmlParserGetDirectory(filename);
14943
inputStream = xmlNewInputStream(ctxt);
14944
if (inputStream == NULL) {
14945
xmlFreeParserInputBuffer(buf);
14949
if (filename == NULL)
14950
inputStream->filename = NULL;
14952
inputStream->filename = (char *)
14953
xmlCanonicPath((const xmlChar *) filename);
14954
inputStream->buf = buf;
14955
xmlBufResetInput(buf->buffer, inputStream);
14957
inputPush(ctxt, inputStream);
14959
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14960
(ctxt->input->buf != NULL)) {
14961
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14962
size_t cur = ctxt->input->cur - ctxt->input->base;
14964
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14966
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14968
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14972
if (encoding != NULL) {
14973
xmlCharEncodingHandlerPtr hdlr;
14975
if (ctxt->encoding != NULL)
14976
xmlFree((xmlChar *) ctxt->encoding);
14977
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14979
hdlr = xmlFindCharEncodingHandler(encoding);
14980
if (hdlr != NULL) {
14981
xmlSwitchToEncoding(ctxt, hdlr);
14983
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14984
"Unsupported encoding %s\n", BAD_CAST encoding);
14986
} else if (enc != XML_CHAR_ENCODING_NONE) {
14987
xmlSwitchEncoding(ctxt, enc);
14995
* xmlCtxtUseOptionsInternal:
14996
* @ctxt: an XML parser context
14997
* @options: a combination of xmlParserOption
14998
* @encoding: the user provided encoding to use
15000
* Applies the options to the parser context
15002
* Returns 0 in case of success, the set of unknown or unimplemented options
15003
* in case of error.
15006
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15010
if (encoding != NULL) {
15011
if (ctxt->encoding != NULL)
15012
xmlFree((xmlChar *) ctxt->encoding);
15013
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015
if (options & XML_PARSE_RECOVER) {
15016
ctxt->recovery = 1;
15017
options -= XML_PARSE_RECOVER;
15018
ctxt->options |= XML_PARSE_RECOVER;
15020
ctxt->recovery = 0;
15021
if (options & XML_PARSE_DTDLOAD) {
15022
ctxt->loadsubset = XML_DETECT_IDS;
15023
options -= XML_PARSE_DTDLOAD;
15024
ctxt->options |= XML_PARSE_DTDLOAD;
15026
ctxt->loadsubset = 0;
15027
if (options & XML_PARSE_DTDATTR) {
15028
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15029
options -= XML_PARSE_DTDATTR;
15030
ctxt->options |= XML_PARSE_DTDATTR;
15032
if (options & XML_PARSE_NOENT) {
15033
ctxt->replaceEntities = 1;
15034
/* ctxt->loadsubset |= XML_DETECT_IDS; */
15035
options -= XML_PARSE_NOENT;
15036
ctxt->options |= XML_PARSE_NOENT;
15038
ctxt->replaceEntities = 0;
15039
if (options & XML_PARSE_PEDANTIC) {
15040
ctxt->pedantic = 1;
15041
options -= XML_PARSE_PEDANTIC;
15042
ctxt->options |= XML_PARSE_PEDANTIC;
15044
ctxt->pedantic = 0;
15045
if (options & XML_PARSE_NOBLANKS) {
15046
ctxt->keepBlanks = 0;
15047
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15048
options -= XML_PARSE_NOBLANKS;
15049
ctxt->options |= XML_PARSE_NOBLANKS;
15051
ctxt->keepBlanks = 1;
15052
if (options & XML_PARSE_DTDVALID) {
15053
ctxt->validate = 1;
15054
if (options & XML_PARSE_NOWARNING)
15055
ctxt->vctxt.warning = NULL;
15056
if (options & XML_PARSE_NOERROR)
15057
ctxt->vctxt.error = NULL;
15058
options -= XML_PARSE_DTDVALID;
15059
ctxt->options |= XML_PARSE_DTDVALID;
15061
ctxt->validate = 0;
15062
if (options & XML_PARSE_NOWARNING) {
15063
ctxt->sax->warning = NULL;
15064
options -= XML_PARSE_NOWARNING;
15066
if (options & XML_PARSE_NOERROR) {
15067
ctxt->sax->error = NULL;
15068
ctxt->sax->fatalError = NULL;
15069
options -= XML_PARSE_NOERROR;
15071
#ifdef LIBXML_SAX1_ENABLED
15072
if (options & XML_PARSE_SAX1) {
15073
ctxt->sax->startElement = xmlSAX2StartElement;
15074
ctxt->sax->endElement = xmlSAX2EndElement;
15075
ctxt->sax->startElementNs = NULL;
15076
ctxt->sax->endElementNs = NULL;
15077
ctxt->sax->initialized = 1;
15078
options -= XML_PARSE_SAX1;
15079
ctxt->options |= XML_PARSE_SAX1;
15081
#endif /* LIBXML_SAX1_ENABLED */
15082
if (options & XML_PARSE_NODICT) {
15083
ctxt->dictNames = 0;
15084
options -= XML_PARSE_NODICT;
15085
ctxt->options |= XML_PARSE_NODICT;
15087
ctxt->dictNames = 1;
15089
if (options & XML_PARSE_NOCDATA) {
15090
ctxt->sax->cdataBlock = NULL;
15091
options -= XML_PARSE_NOCDATA;
15092
ctxt->options |= XML_PARSE_NOCDATA;
15094
if (options & XML_PARSE_NSCLEAN) {
15095
ctxt->options |= XML_PARSE_NSCLEAN;
15096
options -= XML_PARSE_NSCLEAN;
15098
if (options & XML_PARSE_NONET) {
15099
ctxt->options |= XML_PARSE_NONET;
15100
options -= XML_PARSE_NONET;
15102
if (options & XML_PARSE_COMPACT) {
15103
ctxt->options |= XML_PARSE_COMPACT;
15104
options -= XML_PARSE_COMPACT;
15106
if (options & XML_PARSE_OLD10) {
15107
ctxt->options |= XML_PARSE_OLD10;
15108
options -= XML_PARSE_OLD10;
15110
if (options & XML_PARSE_NOBASEFIX) {
15111
ctxt->options |= XML_PARSE_NOBASEFIX;
15112
options -= XML_PARSE_NOBASEFIX;
15114
if (options & XML_PARSE_HUGE) {
15115
ctxt->options |= XML_PARSE_HUGE;
15116
options -= XML_PARSE_HUGE;
15117
if (ctxt->dict != NULL)
15118
xmlDictSetLimit(ctxt->dict, 0);
15120
if (options & XML_PARSE_OLDSAX) {
15121
ctxt->options |= XML_PARSE_OLDSAX;
15122
options -= XML_PARSE_OLDSAX;
15124
if (options & XML_PARSE_IGNORE_ENC) {
15125
ctxt->options |= XML_PARSE_IGNORE_ENC;
15126
options -= XML_PARSE_IGNORE_ENC;
15128
if (options & XML_PARSE_BIG_LINES) {
15129
ctxt->options |= XML_PARSE_BIG_LINES;
15130
options -= XML_PARSE_BIG_LINES;
15132
ctxt->linenumbers = 1;
15137
* xmlCtxtUseOptions:
15138
* @ctxt: an XML parser context
15139
* @options: a combination of xmlParserOption
15141
* Applies the options to the parser context
15143
* Returns 0 in case of success, the set of unknown or unimplemented options
15144
* in case of error.
15147
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15149
return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15154
* @ctxt: an XML parser context
15155
* @URL: the base URL to use for the document
15156
* @encoding: the document encoding, or NULL
15157
* @options: a combination of xmlParserOption
15158
* @reuse: keep the context for reuse
15160
* Common front-end for the xmlRead functions
15162
* Returns the resulting document tree or NULL
15165
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15166
int options, int reuse)
15170
xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15171
if (encoding != NULL) {
15172
xmlCharEncodingHandlerPtr hdlr;
15174
hdlr = xmlFindCharEncodingHandler(encoding);
15176
xmlSwitchToEncoding(ctxt, hdlr);
15178
if ((URL != NULL) && (ctxt->input != NULL) &&
15179
(ctxt->input->filename == NULL))
15180
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15181
xmlParseDocument(ctxt);
15182
if ((ctxt->wellFormed) || ctxt->recovery)
15186
if (ctxt->myDoc != NULL) {
15187
xmlFreeDoc(ctxt->myDoc);
15190
ctxt->myDoc = NULL;
15192
xmlFreeParserCtxt(ctxt);
15200
* @cur: a pointer to a zero terminated string
15201
* @URL: the base URL to use for the document
15202
* @encoding: the document encoding, or NULL
15203
* @options: a combination of xmlParserOption
15205
* parse an XML in-memory document and build a tree.
15207
* Returns the resulting document tree
15210
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15212
xmlParserCtxtPtr ctxt;
15217
ctxt = xmlCreateDocParserCtxt(cur);
15220
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15225
* @filename: a file or URL
15226
* @encoding: the document encoding, or NULL
15227
* @options: a combination of xmlParserOption
15229
* parse an XML file from the filesystem or the network.
15231
* Returns the resulting document tree
15234
xmlReadFile(const char *filename, const char *encoding, int options)
15236
xmlParserCtxtPtr ctxt;
15238
ctxt = xmlCreateURLParserCtxt(filename, options);
15241
return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15246
* @buffer: a pointer to a char array
15247
* @size: the size of the array
15248
* @URL: the base URL to use for the document
15249
* @encoding: the document encoding, or NULL
15250
* @options: a combination of xmlParserOption
15252
* parse an XML in-memory document and build a tree.
15254
* Returns the resulting document tree
15257
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15259
xmlParserCtxtPtr ctxt;
15261
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15264
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15269
* @fd: an open file descriptor
15270
* @URL: the base URL to use for the document
15271
* @encoding: the document encoding, or NULL
15272
* @options: a combination of xmlParserOption
15274
* parse an XML from a file descriptor and build a tree.
15275
* NOTE that the file descriptor will not be closed when the
15276
* reader is closed or reset.
15278
* Returns the resulting document tree
15281
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15283
xmlParserCtxtPtr ctxt;
15284
xmlParserInputBufferPtr input;
15285
xmlParserInputPtr stream;
15290
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15293
input->closecallback = NULL;
15294
ctxt = xmlNewParserCtxt();
15295
if (ctxt == NULL) {
15296
xmlFreeParserInputBuffer(input);
15299
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15300
if (stream == NULL) {
15301
xmlFreeParserInputBuffer(input);
15302
xmlFreeParserCtxt(ctxt);
15305
inputPush(ctxt, stream);
15306
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15311
* @ioread: an I/O read function
15312
* @ioclose: an I/O close function
15313
* @ioctx: an I/O handler
15314
* @URL: the base URL to use for the document
15315
* @encoding: the document encoding, or NULL
15316
* @options: a combination of xmlParserOption
15318
* parse an XML document from I/O functions and source and build a tree.
15320
* Returns the resulting document tree
15323
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15324
void *ioctx, const char *URL, const char *encoding, int options)
15326
xmlParserCtxtPtr ctxt;
15327
xmlParserInputBufferPtr input;
15328
xmlParserInputPtr stream;
15330
if (ioread == NULL)
15333
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15334
XML_CHAR_ENCODING_NONE);
15335
if (input == NULL) {
15336
if (ioclose != NULL)
15340
ctxt = xmlNewParserCtxt();
15341
if (ctxt == NULL) {
15342
xmlFreeParserInputBuffer(input);
15345
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15346
if (stream == NULL) {
15347
xmlFreeParserInputBuffer(input);
15348
xmlFreeParserCtxt(ctxt);
15351
inputPush(ctxt, stream);
15352
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15357
* @ctxt: an XML parser context
15358
* @cur: a pointer to a zero terminated string
15359
* @URL: the base URL to use for the document
15360
* @encoding: the document encoding, or NULL
15361
* @options: a combination of xmlParserOption
15363
* parse an XML in-memory document and build a tree.
15364
* This reuses the existing @ctxt parser context
15366
* Returns the resulting document tree
15369
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15370
const char *URL, const char *encoding, int options)
15372
xmlParserInputPtr stream;
15379
xmlCtxtReset(ctxt);
15381
stream = xmlNewStringInputStream(ctxt, cur);
15382
if (stream == NULL) {
15385
inputPush(ctxt, stream);
15386
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15391
* @ctxt: an XML parser context
15392
* @filename: a file or URL
15393
* @encoding: the document encoding, or NULL
15394
* @options: a combination of xmlParserOption
15396
* parse an XML file from the filesystem or the network.
15397
* This reuses the existing @ctxt parser context
15399
* Returns the resulting document tree
15402
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15403
const char *encoding, int options)
15405
xmlParserInputPtr stream;
15407
if (filename == NULL)
15412
xmlCtxtReset(ctxt);
15414
stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15415
if (stream == NULL) {
15418
inputPush(ctxt, stream);
15419
return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15423
* xmlCtxtReadMemory:
15424
* @ctxt: an XML parser context
15425
* @buffer: a pointer to a char array
15426
* @size: the size of the array
15427
* @URL: the base URL to use for the document
15428
* @encoding: the document encoding, or NULL
15429
* @options: a combination of xmlParserOption
15431
* parse an XML in-memory document and build a tree.
15432
* This reuses the existing @ctxt parser context
15434
* Returns the resulting document tree
15437
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15438
const char *URL, const char *encoding, int options)
15440
xmlParserInputBufferPtr input;
15441
xmlParserInputPtr stream;
15445
if (buffer == NULL)
15448
xmlCtxtReset(ctxt);
15450
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15451
if (input == NULL) {
15455
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15456
if (stream == NULL) {
15457
xmlFreeParserInputBuffer(input);
15461
inputPush(ctxt, stream);
15462
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15467
* @ctxt: an XML parser context
15468
* @fd: an open file descriptor
15469
* @URL: the base URL to use for the document
15470
* @encoding: the document encoding, or NULL
15471
* @options: a combination of xmlParserOption
15473
* parse an XML from a file descriptor and build a tree.
15474
* This reuses the existing @ctxt parser context
15475
* NOTE that the file descriptor will not be closed when the
15476
* reader is closed or reset.
15478
* Returns the resulting document tree
15481
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15482
const char *URL, const char *encoding, int options)
15484
xmlParserInputBufferPtr input;
15485
xmlParserInputPtr stream;
15492
xmlCtxtReset(ctxt);
15495
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15498
input->closecallback = NULL;
15499
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15500
if (stream == NULL) {
15501
xmlFreeParserInputBuffer(input);
15504
inputPush(ctxt, stream);
15505
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15510
* @ctxt: an XML parser context
15511
* @ioread: an I/O read function
15512
* @ioclose: an I/O close function
15513
* @ioctx: an I/O handler
15514
* @URL: the base URL to use for the document
15515
* @encoding: the document encoding, or NULL
15516
* @options: a combination of xmlParserOption
15518
* parse an XML document from I/O functions and source and build a tree.
15519
* This reuses the existing @ctxt parser context
15521
* Returns the resulting document tree
15524
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15525
xmlInputCloseCallback ioclose, void *ioctx,
15527
const char *encoding, int options)
15529
xmlParserInputBufferPtr input;
15530
xmlParserInputPtr stream;
15532
if (ioread == NULL)
15537
xmlCtxtReset(ctxt);
15539
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15540
XML_CHAR_ENCODING_NONE);
15541
if (input == NULL) {
15542
if (ioclose != NULL)
15546
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15547
if (stream == NULL) {
15548
xmlFreeParserInputBuffer(input);
15551
inputPush(ctxt, stream);
15552
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15555
#define bottom_parser
15556
#include "elfgcchack.h"