2
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
* implemented on top of the SAX interfaces
6
* The XML specification:
7
* http://www.w3.org/TR/REC-xml
8
* Original 1.0 version:
9
* http://www.w3.org/TR/1998/REC-xml-19980210
10
* XML second edition working draft
11
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
13
* Okay this is a big file, the parser core is around 7000 lines, then it
14
* is followed by the progressive parser top routines, then the various
15
* high level APIs to call the parser and a few miscellaneous functions.
16
* A number of helper functions and deprecated ones have been moved to
17
* parserInternals.c to reduce this file size.
18
* As much as possible the functions are associated with their relative
19
* production in the XML specification. A few productions defining the
20
* different ranges of character are actually implanted either in
21
* parserInternals.h or parserInternals.c
22
* The DOM tree build is realized from the default SAX callbacks in
24
* The routines doing the validation checks are in valid.c and called either
25
* from the SAX callbacks or as standalone functions using a preparsed
28
* See Copyright for the status of this software.
36
#if defined(WIN32) && !defined (__CYGWIN__)
37
#define XML_DIR_SEP '\\'
39
#define XML_DIR_SEP '/'
46
#include <libxml/xmlmemory.h>
47
#include <libxml/threads.h>
48
#include <libxml/globals.h>
49
#include <libxml/tree.h>
50
#include <libxml/parser.h>
51
#include <libxml/parserInternals.h>
52
#include <libxml/valid.h>
53
#include <libxml/entities.h>
54
#include <libxml/xmlerror.h>
55
#include <libxml/encoding.h>
56
#include <libxml/xmlIO.h>
57
#include <libxml/uri.h>
58
#ifdef LIBXML_CATALOG_ENABLED
59
#include <libxml/catalog.h>
61
#ifdef LIBXML_SCHEMAS_ENABLED
62
#include <libxml/xmlschemastypes.h>
63
#include <libxml/relaxng.h>
71
#ifdef HAVE_SYS_STAT_H
91
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93
static xmlParserCtxtPtr
94
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95
const xmlChar *base, xmlParserCtxtPtr pctx);
97
/************************************************************************
99
* Arbitrary limits set in the parser. See XML_PARSE_HUGE *
101
************************************************************************/
103
#define XML_PARSER_BIG_ENTITY 1000
104
#define XML_PARSER_LOT_ENTITY 5000
107
* XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108
* replacement over the size in byte of the input indicates that you have
109
* and eponential behaviour. A value of 10 correspond to at least 3 entity
110
* replacement per byte of input.
112
#define XML_PARSER_NON_LINEAR 10
115
* xmlParserEntityCheck
117
* Function to check non-linear entity expansion behaviour
118
* This is here to detect and stop exponential linear entity expansion
119
* This is not a limitation of the parser but a safety
120
* boundary feature. It can be disabled with the XML_PARSE_HUGE
124
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125
xmlEntityPtr ent, size_t replacement)
129
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
131
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
133
if (replacement != 0) {
134
if (replacement < XML_MAX_TEXT_LENGTH)
138
* If the volume of entity copy reaches 10 times the
139
* amount of parsed data and over the large text threshold
140
* then that's very likely to be an abuse.
142
if (ctxt->input != NULL) {
143
consumed = ctxt->input->consumed +
144
(ctxt->input->cur - ctxt->input->base);
146
consumed += ctxt->sizeentities;
148
if (replacement < XML_PARSER_NON_LINEAR * consumed)
150
} else if (size != 0) {
152
* Do the check based on the replacement size of the entity
154
if (size < XML_PARSER_BIG_ENTITY)
158
* A limit on the amount of text data reasonably used
160
if (ctxt->input != NULL) {
161
consumed = ctxt->input->consumed +
162
(ctxt->input->cur - ctxt->input->base);
164
consumed += ctxt->sizeentities;
166
if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167
(ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
169
} else if (ent != NULL) {
171
* use the number of parsed entities in the replacement
173
size = ent->checked / 2;
176
* The amount of data parsed counting entities size only once
178
if (ctxt->input != NULL) {
179
consumed = ctxt->input->consumed +
180
(ctxt->input->cur - ctxt->input->base);
182
consumed += ctxt->sizeentities;
185
* Check the density of entities for the amount of data
186
* knowing an entity reference will take at least 3 bytes
188
if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
192
* strange we got no data for checking just return
196
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
203
* arbitrary depth limit for the XML documents that we allow to
204
* process. This is not a limitation of the parser but a safety
205
* boundary feature. It can be disabled with the XML_PARSE_HUGE
208
unsigned int xmlParserMaxDepth = 256;
213
#define XML_PARSER_BIG_BUFFER_SIZE 300
214
#define XML_PARSER_BUFFER_SIZE 100
215
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
218
* XML_PARSER_CHUNK_SIZE
220
* When calling GROW that's the minimal amount of data
221
* the parser expected to have received. It is not a hard
222
* limit but an optimization when reading strings like Names
223
* It is not strictly needed as long as inputs available characters
224
* are followed by 0, which should be provided by the I/O level
226
#define XML_PARSER_CHUNK_SIZE 100
229
* List of XML prefixed PI allowed by W3C specs
232
static const char *xmlW3CPIs[] = {
239
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
240
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241
const xmlChar **str);
243
static xmlParserErrors
244
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245
xmlSAXHandlerPtr sax,
246
void *user_data, int depth, const xmlChar *URL,
247
const xmlChar *ID, xmlNodePtr *list);
250
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251
const char *encoding);
252
#ifdef LIBXML_LEGACY_ENABLED
254
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255
xmlNodePtr lastNode);
256
#endif /* LIBXML_LEGACY_ENABLED */
258
static xmlParserErrors
259
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260
const xmlChar *string, void *user_data, xmlNodePtr *lst);
263
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
265
/************************************************************************
267
* Some factorized error routines *
269
************************************************************************/
272
* xmlErrAttributeDup:
273
* @ctxt: an XML parser context
274
* @prefix: the attribute prefix
275
* @localname: the attribute localname
277
* Handle a redefinition of attribute error
280
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281
const xmlChar * localname)
283
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284
(ctxt->instate == XML_PARSER_EOF))
287
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
290
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
291
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
292
(const char *) localname, NULL, NULL, 0, 0,
293
"Attribute %s redefined\n", localname);
295
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
296
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
297
(const char *) prefix, (const char *) localname,
298
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
301
ctxt->wellFormed = 0;
302
if (ctxt->recovery == 0)
303
ctxt->disableSAX = 1;
309
* @ctxt: an XML parser context
310
* @error: the error number
311
* @extra: extra information string
313
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
316
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
319
char errstr[129] = "";
321
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322
(ctxt->instate == XML_PARSER_EOF))
325
case XML_ERR_INVALID_HEX_CHARREF:
326
errmsg = "CharRef: invalid hexadecimal value";
328
case XML_ERR_INVALID_DEC_CHARREF:
329
errmsg = "CharRef: invalid decimal value";
331
case XML_ERR_INVALID_CHARREF:
332
errmsg = "CharRef: invalid value";
334
case XML_ERR_INTERNAL_ERROR:
335
errmsg = "internal error";
337
case XML_ERR_PEREF_AT_EOF:
338
errmsg = "PEReference at end of document";
340
case XML_ERR_PEREF_IN_PROLOG:
341
errmsg = "PEReference in prolog";
343
case XML_ERR_PEREF_IN_EPILOG:
344
errmsg = "PEReference in epilog";
346
case XML_ERR_PEREF_NO_NAME:
347
errmsg = "PEReference: no name";
349
case XML_ERR_PEREF_SEMICOL_MISSING:
350
errmsg = "PEReference: expecting ';'";
352
case XML_ERR_ENTITY_LOOP:
353
errmsg = "Detected an entity reference loop";
355
case XML_ERR_ENTITY_NOT_STARTED:
356
errmsg = "EntityValue: \" or ' expected";
358
case XML_ERR_ENTITY_PE_INTERNAL:
359
errmsg = "PEReferences forbidden in internal subset";
361
case XML_ERR_ENTITY_NOT_FINISHED:
362
errmsg = "EntityValue: \" or ' expected";
364
case XML_ERR_ATTRIBUTE_NOT_STARTED:
365
errmsg = "AttValue: \" or ' expected";
367
case XML_ERR_LT_IN_ATTRIBUTE:
368
errmsg = "Unescaped '<' not allowed in attributes values";
370
case XML_ERR_LITERAL_NOT_STARTED:
371
errmsg = "SystemLiteral \" or ' expected";
373
case XML_ERR_LITERAL_NOT_FINISHED:
374
errmsg = "Unfinished System or Public ID \" or ' expected";
376
case XML_ERR_MISPLACED_CDATA_END:
377
errmsg = "Sequence ']]>' not allowed in content";
379
case XML_ERR_URI_REQUIRED:
380
errmsg = "SYSTEM or PUBLIC, the URI is missing";
382
case XML_ERR_PUBID_REQUIRED:
383
errmsg = "PUBLIC, the Public Identifier is missing";
385
case XML_ERR_HYPHEN_IN_COMMENT:
386
errmsg = "Comment must not contain '--' (double-hyphen)";
388
case XML_ERR_PI_NOT_STARTED:
389
errmsg = "xmlParsePI : no target name";
391
case XML_ERR_RESERVED_XML_NAME:
392
errmsg = "Invalid PI name";
394
case XML_ERR_NOTATION_NOT_STARTED:
395
errmsg = "NOTATION: Name expected here";
397
case XML_ERR_NOTATION_NOT_FINISHED:
398
errmsg = "'>' required to close NOTATION declaration";
400
case XML_ERR_VALUE_REQUIRED:
401
errmsg = "Entity value required";
403
case XML_ERR_URI_FRAGMENT:
404
errmsg = "Fragment not allowed";
406
case XML_ERR_ATTLIST_NOT_STARTED:
407
errmsg = "'(' required to start ATTLIST enumeration";
409
case XML_ERR_NMTOKEN_REQUIRED:
410
errmsg = "NmToken expected in ATTLIST enumeration";
412
case XML_ERR_ATTLIST_NOT_FINISHED:
413
errmsg = "')' required to finish ATTLIST enumeration";
415
case XML_ERR_MIXED_NOT_STARTED:
416
errmsg = "MixedContentDecl : '|' or ')*' expected";
418
case XML_ERR_PCDATA_REQUIRED:
419
errmsg = "MixedContentDecl : '#PCDATA' expected";
421
case XML_ERR_ELEMCONTENT_NOT_STARTED:
422
errmsg = "ContentDecl : Name or '(' expected";
424
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
425
errmsg = "ContentDecl : ',' '|' or ')' expected";
427
case XML_ERR_PEREF_IN_INT_SUBSET:
429
"PEReference: forbidden within markup decl in internal subset";
431
case XML_ERR_GT_REQUIRED:
432
errmsg = "expected '>'";
434
case XML_ERR_CONDSEC_INVALID:
435
errmsg = "XML conditional section '[' expected";
437
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
438
errmsg = "Content error in the external subset";
440
case XML_ERR_CONDSEC_INVALID_KEYWORD:
442
"conditional section INCLUDE or IGNORE keyword expected";
444
case XML_ERR_CONDSEC_NOT_FINISHED:
445
errmsg = "XML conditional section not closed";
447
case XML_ERR_XMLDECL_NOT_STARTED:
448
errmsg = "Text declaration '<?xml' required";
450
case XML_ERR_XMLDECL_NOT_FINISHED:
451
errmsg = "parsing XML declaration: '?>' expected";
453
case XML_ERR_EXT_ENTITY_STANDALONE:
454
errmsg = "external parsed entities cannot be standalone";
456
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
457
errmsg = "EntityRef: expecting ';'";
459
case XML_ERR_DOCTYPE_NOT_FINISHED:
460
errmsg = "DOCTYPE improperly terminated";
462
case XML_ERR_LTSLASH_REQUIRED:
463
errmsg = "EndTag: '</' not found";
465
case XML_ERR_EQUAL_REQUIRED:
466
errmsg = "expected '='";
468
case XML_ERR_STRING_NOT_CLOSED:
469
errmsg = "String not closed expecting \" or '";
471
case XML_ERR_STRING_NOT_STARTED:
472
errmsg = "String not started expecting ' or \"";
474
case XML_ERR_ENCODING_NAME:
475
errmsg = "Invalid XML encoding name";
477
case XML_ERR_STANDALONE_VALUE:
478
errmsg = "standalone accepts only 'yes' or 'no'";
480
case XML_ERR_DOCUMENT_EMPTY:
481
errmsg = "Document is empty";
483
case XML_ERR_DOCUMENT_END:
484
errmsg = "Extra content at the end of the document";
486
case XML_ERR_NOT_WELL_BALANCED:
487
errmsg = "chunk is not well balanced";
489
case XML_ERR_EXTRA_CONTENT:
490
errmsg = "extra content at the end of well balanced chunk";
492
case XML_ERR_VERSION_MISSING:
493
errmsg = "Malformed declaration expecting version";
495
case XML_ERR_NAME_TOO_LONG:
496
errmsg = "Name too long use XML_PARSE_HUGE option";
504
errmsg = "Unregistered error message";
507
snprintf(errstr, 128, "%s\n", errmsg);
509
snprintf(errstr, 128, "%s: %%s\n", errmsg);
512
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
513
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
516
ctxt->wellFormed = 0;
517
if (ctxt->recovery == 0)
518
ctxt->disableSAX = 1;
524
* @ctxt: an XML parser context
525
* @error: the error number
526
* @msg: the error message
528
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
531
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
534
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535
(ctxt->instate == XML_PARSER_EOF))
539
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
540
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
542
ctxt->wellFormed = 0;
543
if (ctxt->recovery == 0)
544
ctxt->disableSAX = 1;
550
* @ctxt: an XML parser context
551
* @error: the error number
552
* @msg: the error message
559
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560
const char *msg, const xmlChar *str1, const xmlChar *str2)
562
xmlStructuredErrorFunc schannel = NULL;
564
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565
(ctxt->instate == XML_PARSER_EOF))
567
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568
(ctxt->sax->initialized == XML_SAX2_MAGIC))
569
schannel = ctxt->sax->serror;
571
__xmlRaiseError(schannel,
572
(ctxt->sax) ? ctxt->sax->warning : NULL,
574
ctxt, NULL, XML_FROM_PARSER, error,
575
XML_ERR_WARNING, NULL, 0,
576
(const char *) str1, (const char *) str2, NULL, 0, 0,
577
msg, (const char *) str1, (const char *) str2);
579
__xmlRaiseError(schannel, NULL, NULL,
580
ctxt, NULL, XML_FROM_PARSER, error,
581
XML_ERR_WARNING, NULL, 0,
582
(const char *) str1, (const char *) str2, NULL, 0, 0,
583
msg, (const char *) str1, (const char *) str2);
589
* @ctxt: an XML parser context
590
* @error: the error number
591
* @msg: the error message
594
* Handle a validity error.
597
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
598
const char *msg, const xmlChar *str1, const xmlChar *str2)
600
xmlStructuredErrorFunc schannel = NULL;
602
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603
(ctxt->instate == XML_PARSER_EOF))
607
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608
schannel = ctxt->sax->serror;
611
__xmlRaiseError(schannel,
612
ctxt->vctxt.error, ctxt->vctxt.userData,
613
ctxt, NULL, XML_FROM_DTD, error,
614
XML_ERR_ERROR, NULL, 0, (const char *) str1,
615
(const char *) str2, NULL, 0, 0,
616
msg, (const char *) str1, (const char *) str2);
619
__xmlRaiseError(schannel, NULL, NULL,
620
ctxt, NULL, XML_FROM_DTD, error,
621
XML_ERR_ERROR, NULL, 0, (const char *) str1,
622
(const char *) str2, NULL, 0, 0,
623
msg, (const char *) str1, (const char *) str2);
629
* @ctxt: an XML parser context
630
* @error: the error number
631
* @msg: the error message
632
* @val: an integer value
634
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
637
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
638
const char *msg, int val)
640
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641
(ctxt->instate == XML_PARSER_EOF))
645
__xmlRaiseError(NULL, NULL, NULL,
646
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
649
ctxt->wellFormed = 0;
650
if (ctxt->recovery == 0)
651
ctxt->disableSAX = 1;
656
* xmlFatalErrMsgStrIntStr:
657
* @ctxt: an XML parser context
658
* @error: the error number
659
* @msg: the error message
660
* @str1: an string info
661
* @val: an integer value
662
* @str2: an string info
664
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
667
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
668
const char *msg, const xmlChar *str1, int val,
671
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672
(ctxt->instate == XML_PARSER_EOF))
676
__xmlRaiseError(NULL, NULL, NULL,
677
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678
NULL, 0, (const char *) str1, (const char *) str2,
679
NULL, val, 0, msg, str1, val, str2);
681
ctxt->wellFormed = 0;
682
if (ctxt->recovery == 0)
683
ctxt->disableSAX = 1;
689
* @ctxt: an XML parser context
690
* @error: the error number
691
* @msg: the error message
692
* @val: a string value
694
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
697
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
698
const char *msg, const xmlChar * val)
700
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701
(ctxt->instate == XML_PARSER_EOF))
705
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
706
XML_FROM_PARSER, error, XML_ERR_FATAL,
707
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
710
ctxt->wellFormed = 0;
711
if (ctxt->recovery == 0)
712
ctxt->disableSAX = 1;
718
* @ctxt: an XML parser context
719
* @error: the error number
720
* @msg: the error message
721
* @val: a string value
723
* Handle a non fatal parser error
726
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727
const char *msg, const xmlChar * val)
729
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730
(ctxt->instate == XML_PARSER_EOF))
734
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
735
XML_FROM_PARSER, error, XML_ERR_ERROR,
736
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
742
* @ctxt: an XML parser context
743
* @error: the error number
745
* @info1: extra information string
746
* @info2: extra information string
748
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
751
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753
const xmlChar * info1, const xmlChar * info2,
754
const xmlChar * info3)
756
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757
(ctxt->instate == XML_PARSER_EOF))
761
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
762
XML_ERR_ERROR, NULL, 0, (const char *) info1,
763
(const char *) info2, (const char *) info3, 0, 0, msg,
764
info1, info2, info3);
766
ctxt->nsWellFormed = 0;
771
* @ctxt: an XML parser context
772
* @error: the error number
774
* @info1: extra information string
775
* @info2: extra information string
777
* Handle a namespace warning error
780
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
782
const xmlChar * info1, const xmlChar * info2,
783
const xmlChar * info3)
785
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786
(ctxt->instate == XML_PARSER_EOF))
788
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789
XML_ERR_WARNING, NULL, 0, (const char *) info1,
790
(const char *) info2, (const char *) info3, 0, 0, msg,
791
info1, info2, info3);
794
/************************************************************************
796
* Library wide options *
798
************************************************************************/
802
* @feature: the feature to be examined
804
* Examines if the library has been compiled with a given feature.
806
* Returns a non-zero value if the feature exist, otherwise zero.
807
* Returns zero (0) if the feature does not exist or an unknown
808
* unknown feature is requested, non-zero otherwise.
811
xmlHasFeature(xmlFeature feature)
814
case XML_WITH_THREAD:
815
#ifdef LIBXML_THREAD_ENABLED
821
#ifdef LIBXML_TREE_ENABLED
826
case XML_WITH_OUTPUT:
827
#ifdef LIBXML_OUTPUT_ENABLED
833
#ifdef LIBXML_PUSH_ENABLED
838
case XML_WITH_READER:
839
#ifdef LIBXML_READER_ENABLED
844
case XML_WITH_PATTERN:
845
#ifdef LIBXML_PATTERN_ENABLED
850
case XML_WITH_WRITER:
851
#ifdef LIBXML_WRITER_ENABLED
857
#ifdef LIBXML_SAX1_ENABLED
863
#ifdef LIBXML_FTP_ENABLED
869
#ifdef LIBXML_HTTP_ENABLED
875
#ifdef LIBXML_VALID_ENABLED
881
#ifdef LIBXML_HTML_ENABLED
886
case XML_WITH_LEGACY:
887
#ifdef LIBXML_LEGACY_ENABLED
893
#ifdef LIBXML_C14N_ENABLED
898
case XML_WITH_CATALOG:
899
#ifdef LIBXML_CATALOG_ENABLED
905
#ifdef LIBXML_XPATH_ENABLED
911
#ifdef LIBXML_XPTR_ENABLED
916
case XML_WITH_XINCLUDE:
917
#ifdef LIBXML_XINCLUDE_ENABLED
923
#ifdef LIBXML_ICONV_ENABLED
928
case XML_WITH_ISO8859X:
929
#ifdef LIBXML_ISO8859X_ENABLED
934
case XML_WITH_UNICODE:
935
#ifdef LIBXML_UNICODE_ENABLED
940
case XML_WITH_REGEXP:
941
#ifdef LIBXML_REGEXP_ENABLED
946
case XML_WITH_AUTOMATA:
947
#ifdef LIBXML_AUTOMATA_ENABLED
953
#ifdef LIBXML_EXPR_ENABLED
958
case XML_WITH_SCHEMAS:
959
#ifdef LIBXML_SCHEMAS_ENABLED
964
case XML_WITH_SCHEMATRON:
965
#ifdef LIBXML_SCHEMATRON_ENABLED
970
case XML_WITH_MODULES:
971
#ifdef LIBXML_MODULES_ENABLED
977
#ifdef LIBXML_DEBUG_ENABLED
982
case XML_WITH_DEBUG_MEM:
983
#ifdef DEBUG_MEMORY_LOCATION
988
case XML_WITH_DEBUG_RUN:
989
#ifdef LIBXML_DEBUG_RUNTIME
995
#ifdef LIBXML_ZLIB_ENABLED
1001
#ifdef LIBXML_LZMA_ENABLED
1007
#ifdef LIBXML_ICU_ENABLED
1018
/************************************************************************
1020
* SAX2 defaulted attributes handling *
1022
************************************************************************/
1026
* @ctxt: an XML parser context
1028
* Do the SAX2 detection and specific intialization
1031
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032
if (ctxt == NULL) return;
1033
#ifdef LIBXML_SAX1_ENABLED
1034
if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035
((ctxt->sax->startElementNs != NULL) ||
1036
(ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1039
#endif /* LIBXML_SAX1_ENABLED */
1041
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1044
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045
(ctxt->str_xml_ns == NULL)) {
1046
xmlErrMemory(ctxt, NULL);
1050
typedef struct _xmlDefAttrs xmlDefAttrs;
1051
typedef xmlDefAttrs *xmlDefAttrsPtr;
1052
struct _xmlDefAttrs {
1053
int nbAttrs; /* number of defaulted attributes on that element */
1054
int maxAttrs; /* the size of the array */
1055
const xmlChar *values[5]; /* array of localname/prefix/values/external */
1059
* xmlAttrNormalizeSpace:
1060
* @src: the source string
1061
* @dst: the target string
1063
* Normalize the space in non CDATA attribute values:
1064
* If the attribute type is not CDATA, then the XML processor MUST further
1065
* process the normalized attribute value by discarding any leading and
1066
* trailing space (#x20) characters, and by replacing sequences of space
1067
* (#x20) characters by a single space (#x20) character.
1068
* Note that the size of dst need to be at least src, and if one doesn't need
1069
* to preserve dst (and it doesn't come from a dictionary or read-only) then
1070
* passing src as dst is just fine.
1072
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1076
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1078
if ((src == NULL) || (dst == NULL))
1081
while (*src == 0x20) src++;
1084
while (*src == 0x20) src++;
1098
* xmlAttrNormalizeSpace2:
1099
* @src: the source string
1101
* Normalize the space in non CDATA attribute values, a slightly more complex
1102
* front end to avoid allocation problems when running on attribute values
1103
* coming from the input.
1105
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1108
static const xmlChar *
1109
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1112
int remove_head = 0;
1113
int need_realloc = 0;
1116
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1123
while (*cur == 0x20) {
1130
if ((*cur == 0x20) || (*cur == 0)) {
1140
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1142
xmlErrMemory(ctxt, NULL);
1145
xmlAttrNormalizeSpace(ret, ret);
1146
*len = (int) strlen((const char *)ret);
1148
} else if (remove_head) {
1149
*len -= remove_head;
1150
memmove(src, src + remove_head, 1 + *len);
1158
* @ctxt: an XML parser context
1159
* @fullname: the element fullname
1160
* @fullattr: the attribute fullname
1161
* @value: the attribute value
1163
* Add a defaulted attribute for an element
1166
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167
const xmlChar *fullname,
1168
const xmlChar *fullattr,
1169
const xmlChar *value) {
1170
xmlDefAttrsPtr defaults;
1172
const xmlChar *name;
1173
const xmlChar *prefix;
1176
* Allows to detect attribute redefinitions
1178
if (ctxt->attsSpecial != NULL) {
1179
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1183
if (ctxt->attsDefault == NULL) {
1184
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1185
if (ctxt->attsDefault == NULL)
1190
* split the element name into prefix:localname , the string found
1191
* are within the DTD and then not associated to namespace names.
1193
name = xmlSplitQName3(fullname, &len);
1195
name = xmlDictLookup(ctxt->dict, fullname, -1);
1198
name = xmlDictLookup(ctxt->dict, name, -1);
1199
prefix = xmlDictLookup(ctxt->dict, fullname, len);
1203
* make sure there is some storage
1205
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206
if (defaults == NULL) {
1207
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1208
(4 * 5) * sizeof(const xmlChar *));
1209
if (defaults == NULL)
1211
defaults->nbAttrs = 0;
1212
defaults->maxAttrs = 4;
1213
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214
defaults, NULL) < 0) {
1218
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
1219
xmlDefAttrsPtr temp;
1221
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1222
(2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1226
defaults->maxAttrs *= 2;
1227
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228
defaults, NULL) < 0) {
1235
* Split the element name into prefix:localname , the string found
1236
* are within the DTD and hen not associated to namespace names.
1238
name = xmlSplitQName3(fullattr, &len);
1240
name = xmlDictLookup(ctxt->dict, fullattr, -1);
1243
name = xmlDictLookup(ctxt->dict, name, -1);
1244
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1247
defaults->values[5 * defaults->nbAttrs] = name;
1248
defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1249
/* intern the string and precompute the end */
1250
len = xmlStrlen(value);
1251
value = xmlDictLookup(ctxt->dict, value, len);
1252
defaults->values[5 * defaults->nbAttrs + 2] = value;
1253
defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1255
defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1257
defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1258
defaults->nbAttrs++;
1263
xmlErrMemory(ctxt, NULL);
1268
* xmlAddSpecialAttr:
1269
* @ctxt: an XML parser context
1270
* @fullname: the element fullname
1271
* @fullattr: the attribute fullname
1272
* @type: the attribute type
1274
* Register this attribute type
1277
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278
const xmlChar *fullname,
1279
const xmlChar *fullattr,
1282
if (ctxt->attsSpecial == NULL) {
1283
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1284
if (ctxt->attsSpecial == NULL)
1288
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1291
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292
(void *) (long) type);
1296
xmlErrMemory(ctxt, NULL);
1301
* xmlCleanSpecialAttrCallback:
1303
* Removes CDATA attributes from the special attribute table
1306
xmlCleanSpecialAttrCallback(void *payload, void *data,
1307
const xmlChar *fullname, const xmlChar *fullattr,
1308
const xmlChar *unused ATTRIBUTE_UNUSED) {
1309
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1311
if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1312
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1317
* xmlCleanSpecialAttr:
1318
* @ctxt: an XML parser context
1320
* Trim the list of attributes defined to remove all those of type
1321
* CDATA as they are not special. This call should be done when finishing
1322
* to parse the DTD and before starting to parse the document root.
1325
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1327
if (ctxt->attsSpecial == NULL)
1330
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1332
if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333
xmlHashFree(ctxt->attsSpecial, NULL);
1334
ctxt->attsSpecial = NULL;
1340
* xmlCheckLanguageID:
1341
* @lang: pointer to the string value
1343
* Checks that the value conforms to the LanguageID production:
1345
* NOTE: this is somewhat deprecated, those productions were removed from
1346
* the XML Second edition.
1348
* [33] LanguageID ::= Langcode ('-' Subcode)*
1349
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353
* [38] Subcode ::= ([a-z] | [A-Z])+
1355
* The current REC reference the sucessors of RFC 1766, currently 5646
1357
* http://www.rfc-editor.org/rfc/rfc5646.txt
1358
* langtag = language
1364
* language = 2*3ALPHA ; shortest ISO 639 code
1365
* ["-" extlang] ; sometimes followed by
1366
* ; extended language subtags
1367
* / 4ALPHA ; or reserved for future use
1368
* / 5*8ALPHA ; or registered language subtag
1370
* extlang = 3ALPHA ; selected ISO 639 codes
1371
* *2("-" 3ALPHA) ; permanently reserved
1373
* script = 4ALPHA ; ISO 15924 code
1375
* region = 2ALPHA ; ISO 3166-1 code
1376
* / 3DIGIT ; UN M.49 code
1378
* variant = 5*8alphanum ; registered variants
1379
* / (DIGIT 3alphanum)
1381
* extension = singleton 1*("-" (2*8alphanum))
1383
* ; Single alphanumerics
1384
* ; "x" reserved for private use
1385
* singleton = DIGIT ; 0 - 9
1391
* it sounds right to still allow Irregular i-xxx IANA and user codes too
1392
* The parser below doesn't try to cope with extension or privateuse
1393
* that could be added but that's not interoperable anyway
1395
* Returns 1 if correct 0 otherwise
1398
xmlCheckLanguageID(const xmlChar * lang)
1400
const xmlChar *cur = lang, *nxt;
1404
if (((cur[0] == 'i') && (cur[1] == '-')) ||
1405
((cur[0] == 'I') && (cur[1] == '-')) ||
1406
((cur[0] == 'x') && (cur[1] == '-')) ||
1407
((cur[0] == 'X') && (cur[1] == '-'))) {
1409
* Still allow IANA code and user code which were coming
1410
* from the previous version of the XML-1.0 specification
1411
* it's deprecated but we should not fail
1414
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1415
((cur[0] >= 'a') && (cur[0] <= 'z')))
1417
return(cur[0] == 0);
1420
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1423
if (nxt - cur >= 4) {
1427
if ((nxt - cur > 8) || (nxt[0] != 0))
1433
/* we got an ISO 639 code */
1441
/* now we can have extlang or script or region or variant */
1442
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1445
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1452
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456
/* we parsed an extlang */
1464
/* now we can have script or region or variant */
1465
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1468
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1477
/* we parsed a script */
1486
/* now we can have region or variant */
1487
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1490
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1494
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1498
/* we parsed a region */
1507
/* now we can just have a variant */
1508
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1512
if ((nxt - cur < 5) || (nxt - cur > 8))
1515
/* we parsed a variant */
1521
/* extensions and private use subtags not checked */
1525
if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526
((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1533
/************************************************************************
1535
* Parser stacks related functions and macros *
1537
************************************************************************/
1539
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540
const xmlChar ** str);
1545
* @ctxt: an XML parser context
1546
* @prefix: the namespace prefix or NULL
1547
* @URL: the namespace name
1549
* Pushes a new parser namespace on top of the ns stack
1551
* Returns -1 in case of error, -2 if the namespace should be discarded
1552
* and the index in the stack otherwise.
1555
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1557
if (ctxt->options & XML_PARSE_NSCLEAN) {
1559
for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1560
if (ctxt->nsTab[i] == prefix) {
1562
if (ctxt->nsTab[i + 1] == URL)
1564
/* out of scope keep it */
1569
if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1572
ctxt->nsTab = (const xmlChar **)
1573
xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574
if (ctxt->nsTab == NULL) {
1575
xmlErrMemory(ctxt, NULL);
1579
} else if (ctxt->nsNr >= ctxt->nsMax) {
1580
const xmlChar ** tmp;
1582
tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583
ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1585
xmlErrMemory(ctxt, NULL);
1591
ctxt->nsTab[ctxt->nsNr++] = prefix;
1592
ctxt->nsTab[ctxt->nsNr++] = URL;
1593
return (ctxt->nsNr);
1597
* @ctxt: an XML parser context
1598
* @nr: the number to pop
1600
* Pops the top @nr parser prefix/namespace from the ns stack
1602
* Returns the number of namespaces removed
1605
nsPop(xmlParserCtxtPtr ctxt, int nr)
1609
if (ctxt->nsTab == NULL) return(0);
1610
if (ctxt->nsNr < nr) {
1611
xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1614
if (ctxt->nsNr <= 0)
1617
for (i = 0;i < nr;i++) {
1619
ctxt->nsTab[ctxt->nsNr] = NULL;
1626
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627
const xmlChar **atts;
1631
if (ctxt->atts == NULL) {
1632
maxatts = 55; /* allow for 10 attrs by default */
1633
atts = (const xmlChar **)
1634
xmlMalloc(maxatts * sizeof(xmlChar *));
1635
if (atts == NULL) goto mem_error;
1637
attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638
if (attallocs == NULL) goto mem_error;
1639
ctxt->attallocs = attallocs;
1640
ctxt->maxatts = maxatts;
1641
} else if (nr + 5 > ctxt->maxatts) {
1642
maxatts = (nr + 5) * 2;
1643
atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644
maxatts * sizeof(const xmlChar *));
1645
if (atts == NULL) goto mem_error;
1647
attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648
(maxatts / 5) * sizeof(int));
1649
if (attallocs == NULL) goto mem_error;
1650
ctxt->attallocs = attallocs;
1651
ctxt->maxatts = maxatts;
1653
return(ctxt->maxatts);
1655
xmlErrMemory(ctxt, NULL);
1661
* @ctxt: an XML parser context
1662
* @value: the parser input
1664
* Pushes a new parser input on top of the input stack
1666
* Returns -1 in case of error, the index in the stack otherwise
1669
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1671
if ((ctxt == NULL) || (value == NULL))
1673
if (ctxt->inputNr >= ctxt->inputMax) {
1674
ctxt->inputMax *= 2;
1676
(xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1678
sizeof(ctxt->inputTab[0]));
1679
if (ctxt->inputTab == NULL) {
1680
xmlErrMemory(ctxt, NULL);
1681
xmlFreeInputStream(value);
1682
ctxt->inputMax /= 2;
1687
ctxt->inputTab[ctxt->inputNr] = value;
1688
ctxt->input = value;
1689
return (ctxt->inputNr++);
1693
* @ctxt: an XML parser context
1695
* Pops the top parser input from the input stack
1697
* Returns the input just removed
1700
inputPop(xmlParserCtxtPtr ctxt)
1702
xmlParserInputPtr ret;
1706
if (ctxt->inputNr <= 0)
1709
if (ctxt->inputNr > 0)
1710
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1713
ret = ctxt->inputTab[ctxt->inputNr];
1714
ctxt->inputTab[ctxt->inputNr] = NULL;
1719
* @ctxt: an XML parser context
1720
* @value: the element node
1722
* Pushes a new element node on top of the node stack
1724
* Returns -1 in case of error, the index in the stack otherwise
1727
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1729
if (ctxt == NULL) return(0);
1730
if (ctxt->nodeNr >= ctxt->nodeMax) {
1733
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1735
sizeof(ctxt->nodeTab[0]));
1737
xmlErrMemory(ctxt, NULL);
1740
ctxt->nodeTab = tmp;
1743
if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744
((ctxt->options & XML_PARSE_HUGE) == 0)) {
1745
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1746
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1748
ctxt->instate = XML_PARSER_EOF;
1751
ctxt->nodeTab[ctxt->nodeNr] = value;
1753
return (ctxt->nodeNr++);
1758
* @ctxt: an XML parser context
1760
* Pops the top element node from the node stack
1762
* Returns the node just removed
1765
nodePop(xmlParserCtxtPtr ctxt)
1769
if (ctxt == NULL) return(NULL);
1770
if (ctxt->nodeNr <= 0)
1773
if (ctxt->nodeNr > 0)
1774
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1777
ret = ctxt->nodeTab[ctxt->nodeNr];
1778
ctxt->nodeTab[ctxt->nodeNr] = NULL;
1782
#ifdef LIBXML_PUSH_ENABLED
1785
* @ctxt: an XML parser context
1786
* @value: the element name
1787
* @prefix: the element prefix
1788
* @URI: the element namespace name
1790
* Pushes a new element name/prefix/URL on top of the name stack
1792
* Returns -1 in case of error, the index in the stack otherwise
1795
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796
const xmlChar *prefix, const xmlChar *URI, int nsNr)
1798
if (ctxt->nameNr >= ctxt->nameMax) {
1799
const xmlChar * *tmp;
1802
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1804
sizeof(ctxt->nameTab[0]));
1809
ctxt->nameTab = tmp;
1810
tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1812
sizeof(ctxt->pushTab[0]));
1817
ctxt->pushTab = tmp2;
1819
ctxt->nameTab[ctxt->nameNr] = value;
1821
ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822
ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1823
ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1824
return (ctxt->nameNr++);
1826
xmlErrMemory(ctxt, NULL);
1831
* @ctxt: an XML parser context
1833
* Pops the top element/prefix/URI name from the name stack
1835
* Returns the name just removed
1837
static const xmlChar *
1838
nameNsPop(xmlParserCtxtPtr ctxt)
1842
if (ctxt->nameNr <= 0)
1845
if (ctxt->nameNr > 0)
1846
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1849
ret = ctxt->nameTab[ctxt->nameNr];
1850
ctxt->nameTab[ctxt->nameNr] = NULL;
1853
#endif /* LIBXML_PUSH_ENABLED */
1857
* @ctxt: an XML parser context
1858
* @value: the element name
1860
* Pushes a new element name on top of the name stack
1862
* Returns -1 in case of error, the index in the stack otherwise
1865
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1867
if (ctxt == NULL) return (-1);
1869
if (ctxt->nameNr >= ctxt->nameMax) {
1870
const xmlChar * *tmp;
1871
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1873
sizeof(ctxt->nameTab[0]));
1877
ctxt->nameTab = tmp;
1880
ctxt->nameTab[ctxt->nameNr] = value;
1882
return (ctxt->nameNr++);
1884
xmlErrMemory(ctxt, NULL);
1889
* @ctxt: an XML parser context
1891
* Pops the top element name from the name stack
1893
* Returns the name just removed
1896
namePop(xmlParserCtxtPtr ctxt)
1900
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1903
if (ctxt->nameNr > 0)
1904
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1907
ret = ctxt->nameTab[ctxt->nameNr];
1908
ctxt->nameTab[ctxt->nameNr] = NULL;
1912
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1913
if (ctxt->spaceNr >= ctxt->spaceMax) {
1916
ctxt->spaceMax *= 2;
1917
tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1920
xmlErrMemory(ctxt, NULL);
1924
ctxt->spaceTab = tmp;
1926
ctxt->spaceTab[ctxt->spaceNr] = val;
1927
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928
return(ctxt->spaceNr++);
1931
static int spacePop(xmlParserCtxtPtr ctxt) {
1933
if (ctxt->spaceNr <= 0) return(0);
1935
if (ctxt->spaceNr > 0)
1936
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1938
ctxt->space = &ctxt->spaceTab[0];
1939
ret = ctxt->spaceTab[ctxt->spaceNr];
1940
ctxt->spaceTab[ctxt->spaceNr] = -1;
1945
* Macros for accessing the content. Those should be used only by the parser,
1948
* Dirty macros, i.e. one often need to make assumption on the context to
1951
* CUR_PTR return the current pointer to the xmlChar to be parsed.
1952
* To be used with extreme caution since operations consuming
1953
* characters may move the input buffer to a different location !
1954
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955
* This should be used internally by the parser
1956
* only to compare to ASCII values otherwise it would break when
1957
* running with UTF-8 encoding.
1958
* RAW same as CUR but in the input buffer, bypass any token
1959
* extraction that may have been done
1960
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961
* to compare on ASCII based substring.
1962
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1963
* strings without newlines within the parser.
1964
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1965
* defined char within the parser.
1966
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1968
* NEXT Skip to the next character, this does the proper decoding
1969
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
1970
* NEXTL(l) Skip the current unicode character of l xmlChars long.
1971
* CUR_CHAR(l) returns the current unicode character (int), set l
1972
* to the number of xmlChars used for the encoding [0-5].
1973
* CUR_SCHAR same but operate on a string instead of the context
1974
* COPY_BUF copy the current unicode char to the target buffer, increment
1976
* GROW, SHRINK handling of input buffers
1979
#define RAW (*ctxt->input->cur)
1980
#define CUR (*ctxt->input->cur)
1981
#define NXT(val) ctxt->input->cur[(val)]
1982
#define CUR_PTR ctxt->input->cur
1984
#define CMP4( s, c1, c2, c3, c4 ) \
1985
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997
((unsigned char *) s)[ 8 ] == c9 )
1998
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000
((unsigned char *) s)[ 9 ] == c10 )
2002
#define SKIP(val) do { \
2003
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2004
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2005
if ((*ctxt->input->cur == 0) && \
2006
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007
xmlPopInput(ctxt); \
2010
#define SKIPL(val) do { \
2012
for(skipl=0; skipl<val; skipl++) { \
2013
if (*(ctxt->input->cur) == '\n') { \
2014
ctxt->input->line++; ctxt->input->col = 1; \
2015
} else ctxt->input->col++; \
2017
ctxt->input->cur++; \
2019
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020
if ((*ctxt->input->cur == 0) && \
2021
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022
xmlPopInput(ctxt); \
2025
#define SHRINK if ((ctxt->progressive == 0) && \
2026
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2030
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031
xmlParserInputShrink(ctxt->input);
2032
if ((*ctxt->input->cur == 0) &&
2033
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2037
#define GROW if ((ctxt->progressive == 0) && \
2038
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2041
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2042
if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
2044
((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2045
((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2047
ctxt->instate = XML_PARSER_EOF;
2049
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2050
if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2051
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2057
#define NEXT xmlNextChar(ctxt)
2060
ctxt->input->col++; \
2061
ctxt->input->cur++; \
2063
if (*ctxt->input->cur == 0) \
2064
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2067
#define NEXTL(l) do { \
2068
if (*(ctxt->input->cur) == '\n') { \
2069
ctxt->input->line++; ctxt->input->col = 1; \
2070
} else ctxt->input->col++; \
2071
ctxt->input->cur += l; \
2072
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2075
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2078
#define COPY_BUF(l,b,i,v) \
2079
if (l == 1) b[i++] = (xmlChar) v; \
2080
else i += xmlCopyCharMultiByte(&b[i],v)
2083
* xmlSkipBlankChars:
2084
* @ctxt: the XML parser context
2086
* skip all blanks character found at that point in the input streams.
2087
* It pops up finished entities in the process if allowable at that point.
2089
* Returns the number of space chars skipped
2093
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2097
* It's Okay to use CUR/NEXT here since all the blanks are on
2100
if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2103
* if we are in the document content, go really fast
2105
cur = ctxt->input->cur;
2106
while (IS_BLANK_CH(*cur)) {
2108
ctxt->input->line++; ctxt->input->col = 1;
2113
ctxt->input->cur = cur;
2114
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115
cur = ctxt->input->cur;
2118
ctxt->input->cur = cur;
2123
while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2128
while ((cur == 0) && (ctxt->inputNr > 1) &&
2129
(ctxt->instate != XML_PARSER_COMMENT)) {
2134
* Need to handle support of entities branching here
2136
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137
} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2142
/************************************************************************
2144
* Commodity functions to handle entities *
2146
************************************************************************/
2150
* @ctxt: an XML parser context
2152
* xmlPopInput: the current input pointed by ctxt->input came to an end
2153
* pop it and return the next char.
2155
* Returns the current xmlChar in the parser context
2158
xmlPopInput(xmlParserCtxtPtr ctxt) {
2159
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2160
if (xmlParserDebugEntities)
2161
xmlGenericError(xmlGenericErrorContext,
2162
"Popping input %d\n", ctxt->inputNr);
2163
xmlFreeInputStream(inputPop(ctxt));
2164
if ((*ctxt->input->cur == 0) &&
2165
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166
return(xmlPopInput(ctxt));
2172
* @ctxt: an XML parser context
2173
* @input: an XML parser input fragment (entity, XML fragment ...).
2175
* xmlPushInput: switch to a new input stream which is stacked on top
2176
* of the previous one(s).
2177
* Returns -1 in case of error or the index in the input stack
2180
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2182
if (input == NULL) return(-1);
2184
if (xmlParserDebugEntities) {
2185
if ((ctxt->input != NULL) && (ctxt->input->filename))
2186
xmlGenericError(xmlGenericErrorContext,
2187
"%s(%d): ", ctxt->input->filename,
2189
xmlGenericError(xmlGenericErrorContext,
2190
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2192
ret = inputPush(ctxt, input);
2193
if (ctxt->instate == XML_PARSER_EOF)
2201
* @ctxt: an XML parser context
2203
* parse Reference declarations
2205
* [66] CharRef ::= '&#' [0-9]+ ';' |
2206
* '&#x' [0-9a-fA-F]+ ';'
2208
* [ WFC: Legal Character ]
2209
* Characters referred to using character references must match the
2210
* production for Char.
2212
* Returns the value parsed (as an int), 0 in case of error
2215
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2216
unsigned int val = 0;
2218
unsigned int outofrange = 0;
2221
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2223
if ((RAW == '&') && (NXT(1) == '#') &&
2227
while (RAW != ';') { /* loop blocked by count */
2231
if (ctxt->instate == XML_PARSER_EOF)
2234
if ((RAW >= '0') && (RAW <= '9'))
2235
val = val * 16 + (CUR - '0');
2236
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237
val = val * 16 + (CUR - 'a') + 10;
2238
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239
val = val * 16 + (CUR - 'A') + 10;
2241
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2252
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2257
} else if ((RAW == '&') && (NXT(1) == '#')) {
2260
while (RAW != ';') { /* loop blocked by count */
2264
if (ctxt->instate == XML_PARSER_EOF)
2267
if ((RAW >= '0') && (RAW <= '9'))
2268
val = val * 10 + (CUR - '0');
2270
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2281
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2287
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2291
* [ WFC: Legal Character ]
2292
* Characters referred to using character references must match the
2293
* production for Char.
2295
if ((IS_CHAR(val) && (outofrange == 0))) {
2298
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299
"xmlParseCharRef: invalid xmlChar value %d\n",
2306
* xmlParseStringCharRef:
2307
* @ctxt: an XML parser context
2308
* @str: a pointer to an index in the string
2310
* parse Reference declarations, variant parsing from a string rather
2311
* than an an input flow.
2313
* [66] CharRef ::= '&#' [0-9]+ ';' |
2314
* '&#x' [0-9a-fA-F]+ ';'
2316
* [ WFC: Legal Character ]
2317
* Characters referred to using character references must match the
2318
* production for Char.
2320
* Returns the value parsed (as an int), 0 in case of error, str will be
2321
* updated to the current value of the index
2324
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2327
unsigned int val = 0;
2328
unsigned int outofrange = 0;
2330
if ((str == NULL) || (*str == NULL)) return(0);
2333
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2336
while (cur != ';') { /* Non input consuming loop */
2337
if ((cur >= '0') && (cur <= '9'))
2338
val = val * 16 + (cur - '0');
2339
else if ((cur >= 'a') && (cur <= 'f'))
2340
val = val * 16 + (cur - 'a') + 10;
2341
else if ((cur >= 'A') && (cur <= 'F'))
2342
val = val * 16 + (cur - 'A') + 10;
2344
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2356
} else if ((cur == '&') && (ptr[1] == '#')){
2359
while (cur != ';') { /* Non input consuming loops */
2360
if ((cur >= '0') && (cur <= '9'))
2361
val = val * 10 + (cur - '0');
2363
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2376
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2382
* [ WFC: Legal Character ]
2383
* Characters referred to using character references must match the
2384
* production for Char.
2386
if ((IS_CHAR(val) && (outofrange == 0))) {
2389
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
"xmlParseStringCharRef: invalid xmlChar value %d\n",
2397
* xmlNewBlanksWrapperInputStream:
2398
* @ctxt: an XML parser context
2399
* @entity: an Entity pointer
2401
* Create a new input stream for wrapping
2402
* blanks around a PEReference
2404
* Returns the new input stream or NULL
2407
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2409
static xmlParserInputPtr
2410
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411
xmlParserInputPtr input;
2414
if (entity == NULL) {
2415
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416
"xmlNewBlanksWrapperInputStream entity\n");
2419
if (xmlParserDebugEntities)
2420
xmlGenericError(xmlGenericErrorContext,
2421
"new blanks wrapper for entity: %s\n", entity->name);
2422
input = xmlNewInputStream(ctxt);
2423
if (input == NULL) {
2426
length = xmlStrlen(entity->name) + 5;
2427
buffer = xmlMallocAtomic(length);
2428
if (buffer == NULL) {
2429
xmlErrMemory(ctxt, NULL);
2435
buffer [length-3] = ';';
2436
buffer [length-2] = ' ';
2437
buffer [length-1] = 0;
2438
memcpy(buffer + 2, entity->name, length - 5);
2439
input->free = deallocblankswrapper;
2440
input->base = buffer;
2441
input->cur = buffer;
2442
input->length = length;
2443
input->end = &buffer[length];
2448
* xmlParserHandlePEReference:
2449
* @ctxt: the parser context
2451
* [69] PEReference ::= '%' Name ';'
2453
* [ WFC: No Recursion ]
2454
* A parsed entity must not contain a recursive
2455
* reference to itself, either directly or indirectly.
2457
* [ WFC: Entity Declared ]
2458
* In a document without any DTD, a document with only an internal DTD
2459
* subset which contains no parameter entity references, or a document
2460
* with "standalone='yes'", ... ... The declaration of a parameter
2461
* entity must precede any reference to it...
2463
* [ VC: Entity Declared ]
2464
* In a document with an external subset or external parameter entities
2465
* with "standalone='no'", ... ... The declaration of a parameter entity
2466
* must precede any reference to it...
2469
* Parameter-entity references may only appear in the DTD.
2470
* NOTE: misleading but this is handled.
2472
* A PEReference may have been detected in the current input stream
2473
* the handling is done accordingly to
2474
* http://www.w3.org/TR/REC-xml#entproc
2476
* - Included in literal in entity values
2477
* - Included as Parameter Entity reference within DTDs
2480
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2481
const xmlChar *name;
2482
xmlEntityPtr entity = NULL;
2483
xmlParserInputPtr input;
2485
if (RAW != '%') return;
2486
switch(ctxt->instate) {
2487
case XML_PARSER_CDATA_SECTION:
2489
case XML_PARSER_COMMENT:
2491
case XML_PARSER_START_TAG:
2493
case XML_PARSER_END_TAG:
2495
case XML_PARSER_EOF:
2496
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2498
case XML_PARSER_PROLOG:
2499
case XML_PARSER_START:
2500
case XML_PARSER_MISC:
2501
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2503
case XML_PARSER_ENTITY_DECL:
2504
case XML_PARSER_CONTENT:
2505
case XML_PARSER_ATTRIBUTE_VALUE:
2507
case XML_PARSER_SYSTEM_LITERAL:
2508
case XML_PARSER_PUBLIC_LITERAL:
2509
/* we just ignore it there */
2511
case XML_PARSER_EPILOG:
2512
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2514
case XML_PARSER_ENTITY_VALUE:
2516
* NOTE: in the case of entity values, we don't do the
2517
* substitution here since we need the literal
2518
* entity value to be able to save the internal
2519
* subset of the document.
2520
* This will be handled by xmlStringDecodeEntities
2523
case XML_PARSER_DTD:
2525
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526
* In the internal DTD subset, parameter-entity references
2527
* can occur only where markup declarations can occur, not
2528
* within markup declarations.
2529
* In that case this is handled in xmlParseMarkupDecl
2531
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2533
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2536
case XML_PARSER_IGNORE:
2541
name = xmlParseName(ctxt);
2542
if (xmlParserDebugEntities)
2543
xmlGenericError(xmlGenericErrorContext,
2544
"PEReference: %s\n", name);
2546
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2550
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2552
if (ctxt->instate == XML_PARSER_EOF)
2554
if (entity == NULL) {
2557
* [ WFC: Entity Declared ]
2558
* In a document without any DTD, a document with only an
2559
* internal DTD subset which contains no parameter entity
2560
* references, or a document with "standalone='yes'", ...
2561
* ... The declaration of a parameter entity must precede
2562
* any reference to it...
2564
if ((ctxt->standalone == 1) ||
2565
((ctxt->hasExternalSubset == 0) &&
2566
(ctxt->hasPErefs == 0))) {
2567
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2568
"PEReference: %%%s; not found\n", name);
2571
* [ VC: Entity Declared ]
2572
* In a document with an external subset or external
2573
* parameter entities with "standalone='no'", ...
2574
* ... The declaration of a parameter entity must precede
2575
* any reference to it...
2577
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579
"PEReference: %%%s; not found\n",
2582
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583
"PEReference: %%%s; not found\n",
2587
} else if (ctxt->input->free != deallocblankswrapper) {
2588
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2589
if (xmlPushInput(ctxt, input) < 0)
2592
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2595
xmlCharEncoding enc;
2598
* handle the extra spaces added before and after
2599
* c.f. http://www.w3.org/TR/REC-xml#as-PE
2600
* this is done independently.
2602
input = xmlNewEntityInputStream(ctxt, entity);
2603
if (xmlPushInput(ctxt, input) < 0)
2607
* Get the 4 first bytes and decode the charset
2608
* if enc != XML_CHAR_ENCODING_NONE
2609
* plug some encoding conversion routines.
2610
* Note that, since we may have some non-UTF8
2611
* encoding (like UTF16, bug 135229), the 'length'
2612
* is not known, but we can calculate based upon
2613
* the amount of data in the buffer.
2616
if (ctxt->instate == XML_PARSER_EOF)
2618
if ((ctxt->input->end - ctxt->input->cur)>=4) {
2623
enc = xmlDetectCharEncoding(start, 4);
2624
if (enc != XML_CHAR_ENCODING_NONE) {
2625
xmlSwitchEncoding(ctxt, enc);
2629
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2630
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2631
(IS_BLANK_CH(NXT(5)))) {
2632
xmlParseTextDecl(ctxt);
2635
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2636
"PEReference: %s is not a parameter entity\n",
2641
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2647
* Macro used to grow the current buffer.
2648
* buffer##_size is expected to be a size_t
2649
* mem_error: is expected to handle memory allocation failures
2651
#define growBuffer(buffer, n) { \
2653
size_t new_size = buffer##_size * 2 + n; \
2654
if (new_size < buffer##_size) goto mem_error; \
2655
tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2656
if (tmp == NULL) goto mem_error; \
2658
buffer##_size = new_size; \
2662
* xmlStringLenDecodeEntities:
2663
* @ctxt: the parser context
2664
* @str: the input string
2665
* @len: the string length
2666
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2667
* @end: an end marker xmlChar, 0 if none
2668
* @end2: an end marker xmlChar, 0 if none
2669
* @end3: an end marker xmlChar, 0 if none
2671
* Takes a entity string content and process to do the adequate substitutions.
2673
* [67] Reference ::= EntityRef | CharRef
2675
* [69] PEReference ::= '%' Name ';'
2677
* Returns A newly allocated string with the substitution done. The caller
2678
* must deallocate it !
2681
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2682
int what, xmlChar end, xmlChar end2, xmlChar end3) {
2683
xmlChar *buffer = NULL;
2684
size_t buffer_size = 0;
2687
xmlChar *current = NULL;
2688
xmlChar *rep = NULL;
2689
const xmlChar *last;
2693
if ((ctxt == NULL) || (str == NULL) || (len < 0))
2697
if (((ctxt->depth > 40) &&
2698
((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2699
(ctxt->depth > 1024)) {
2700
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2705
* allocate a translation buffer.
2707
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2708
buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2709
if (buffer == NULL) goto mem_error;
2712
* OK loop until we reach one of the ending char or a size limit.
2713
* we are operating on already parsed values.
2716
c = CUR_SCHAR(str, l);
2719
while ((c != 0) && (c != end) && /* non input consuming loop */
2720
(c != end2) && (c != end3)) {
2723
if ((c == '&') && (str[1] == '#')) {
2724
int val = xmlParseStringCharRef(ctxt, &str);
2726
COPY_BUF(0,buffer,nbchars,val);
2728
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2732
if (xmlParserDebugEntities)
2733
xmlGenericError(xmlGenericErrorContext,
2734
"String decoding Entity Reference: %.30s\n",
2736
ent = xmlParseStringEntityRef(ctxt, &str);
2737
if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2738
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2741
ctxt->nbentities += ent->checked / 2;
2742
if ((ent != NULL) &&
2743
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2744
if (ent->content != NULL) {
2745
COPY_BUF(0,buffer,nbchars,ent->content[0]);
2746
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2747
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2750
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2751
"predefined entity has no content\n");
2753
} else if ((ent != NULL) && (ent->content != NULL)) {
2755
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2761
while (*current != 0) { /* non input consuming loop */
2762
buffer[nbchars++] = *current++;
2763
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2764
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2766
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2772
} else if (ent != NULL) {
2773
int i = xmlStrlen(ent->name);
2774
const xmlChar *cur = ent->name;
2776
buffer[nbchars++] = '&';
2777
if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2781
buffer[nbchars++] = *cur++;
2782
buffer[nbchars++] = ';';
2784
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2785
if (xmlParserDebugEntities)
2786
xmlGenericError(xmlGenericErrorContext,
2787
"String decoding PE Reference: %.30s\n", str);
2788
ent = xmlParseStringPEReference(ctxt, &str);
2789
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2792
ctxt->nbentities += ent->checked / 2;
2794
if (ent->content == NULL) {
2795
xmlLoadEntityContent(ctxt, ent);
2798
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2803
while (*current != 0) { /* non input consuming loop */
2804
buffer[nbchars++] = *current++;
2805
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2806
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2808
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2816
COPY_BUF(l,buffer,nbchars,c);
2818
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2819
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2823
c = CUR_SCHAR(str, l);
2827
buffer[nbchars] = 0;
2831
xmlErrMemory(ctxt, NULL);
2841
* xmlStringDecodeEntities:
2842
* @ctxt: the parser context
2843
* @str: the input string
2844
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2845
* @end: an end marker xmlChar, 0 if none
2846
* @end2: an end marker xmlChar, 0 if none
2847
* @end3: an end marker xmlChar, 0 if none
2849
* Takes a entity string content and process to do the adequate substitutions.
2851
* [67] Reference ::= EntityRef | CharRef
2853
* [69] PEReference ::= '%' Name ';'
2855
* Returns A newly allocated string with the substitution done. The caller
2856
* must deallocate it !
2859
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2860
xmlChar end, xmlChar end2, xmlChar end3) {
2861
if ((ctxt == NULL) || (str == NULL)) return(NULL);
2862
return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2866
/************************************************************************
2868
* Commodity functions, cleanup needed ? *
2870
************************************************************************/
2874
* @ctxt: an XML parser context
2876
* @len: the size of @str
2877
* @blank_chars: we know the chars are blanks
2879
* Is this a sequence of blank chars that one can ignore ?
2881
* Returns 1 if ignorable 0 otherwise.
2884
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2887
xmlNodePtr lastChild;
2890
* Don't spend time trying to differentiate them, the same callback is
2893
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2897
* Check for xml:space value.
2899
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2900
(*(ctxt->space) == -2))
2904
* Check that the string is made of blanks
2906
if (blank_chars == 0) {
2907
for (i = 0;i < len;i++)
2908
if (!(IS_BLANK_CH(str[i]))) return(0);
2912
* Look if the element is mixed content in the DTD if available
2914
if (ctxt->node == NULL) return(0);
2915
if (ctxt->myDoc != NULL) {
2916
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2917
if (ret == 0) return(1);
2918
if (ret == 1) return(0);
2922
* Otherwise, heuristic :-\
2924
if ((RAW != '<') && (RAW != 0xD)) return(0);
2925
if ((ctxt->node->children == NULL) &&
2926
(RAW == '<') && (NXT(1) == '/')) return(0);
2928
lastChild = xmlGetLastChild(ctxt->node);
2929
if (lastChild == NULL) {
2930
if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2931
(ctxt->node->content != NULL)) return(0);
2932
} else if (xmlNodeIsText(lastChild))
2934
else if ((ctxt->node->children != NULL) &&
2935
(xmlNodeIsText(ctxt->node->children)))
2940
/************************************************************************
2942
* Extra stuff for namespace support *
2943
* Relates to http://www.w3.org/TR/WD-xml-names *
2945
************************************************************************/
2949
* @ctxt: an XML parser context
2950
* @name: an XML parser context
2951
* @prefix: a xmlChar **
2953
* parse an UTF8 encoded XML qualified name string
2955
* [NS 5] QName ::= (Prefix ':')? LocalPart
2957
* [NS 6] Prefix ::= NCName
2959
* [NS 7] LocalPart ::= NCName
2961
* Returns the local part, and prefix is updated
2962
* to get the Prefix if any.
2966
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2967
xmlChar buf[XML_MAX_NAMELEN + 5];
2968
xmlChar *buffer = NULL;
2970
int max = XML_MAX_NAMELEN;
2971
xmlChar *ret = NULL;
2972
const xmlChar *cur = name;
2975
if (prefix == NULL) return(NULL);
2978
if (cur == NULL) return(NULL);
2980
#ifndef XML_XML_NAMESPACE
2981
/* xml: prefix is not really a namespace */
2982
if ((cur[0] == 'x') && (cur[1] == 'm') &&
2983
(cur[2] == 'l') && (cur[3] == ':'))
2984
return(xmlStrdup(name));
2987
/* nasty but well=formed */
2989
return(xmlStrdup(name));
2992
while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2998
* Okay someone managed to make a huge name, so he's ready to pay
2999
* for the processing speed.
3003
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004
if (buffer == NULL) {
3005
xmlErrMemory(ctxt, NULL);
3008
memcpy(buffer, buf, len);
3009
while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3010
if (len + 10 > max) {
3014
tmp = (xmlChar *) xmlRealloc(buffer,
3015
max * sizeof(xmlChar));
3018
xmlErrMemory(ctxt, NULL);
3029
if ((c == ':') && (*cur == 0)) {
3033
return(xmlStrdup(name));
3037
ret = xmlStrndup(buf, len);
3041
max = XML_MAX_NAMELEN;
3049
return(xmlStrndup(BAD_CAST "", 0));
3054
* Check that the first character is proper to start
3057
if (!(((c >= 0x61) && (c <= 0x7A)) ||
3058
((c >= 0x41) && (c <= 0x5A)) ||
3059
(c == '_') || (c == ':'))) {
3061
int first = CUR_SCHAR(cur, l);
3063
if (!IS_LETTER(first) && (first != '_')) {
3064
xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3065
"Name %s is not XML Namespace compliant\n",
3071
while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3077
* Okay someone managed to make a huge name, so he's ready to pay
3078
* for the processing speed.
3082
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3083
if (buffer == NULL) {
3084
xmlErrMemory(ctxt, NULL);
3087
memcpy(buffer, buf, len);
3088
while (c != 0) { /* tested bigname2.xml */
3089
if (len + 10 > max) {
3093
tmp = (xmlChar *) xmlRealloc(buffer,
3094
max * sizeof(xmlChar));
3096
xmlErrMemory(ctxt, NULL);
3109
ret = xmlStrndup(buf, len);
3118
/************************************************************************
3120
* The parser itself *
3121
* Relates to http://www.w3.org/TR/REC-xml *
3123
************************************************************************/
3125
/************************************************************************
3127
* Routines to parse Name, NCName and NmToken *
3129
************************************************************************/
3131
static unsigned long nbParseName = 0;
3132
static unsigned long nbParseNmToken = 0;
3133
static unsigned long nbParseNCName = 0;
3134
static unsigned long nbParseNCNameComplex = 0;
3135
static unsigned long nbParseNameComplex = 0;
3136
static unsigned long nbParseStringName = 0;
3140
* The two following functions are related to the change of accepted
3141
* characters for Name and NmToken in the Revision 5 of XML-1.0
3142
* They correspond to the modified production [4] and the new production [4a]
3143
* changes in that revision. Also note that the macros used for the
3144
* productions Letter, Digit, CombiningChar and Extender are not needed
3146
* We still keep compatibility to pre-revision5 parsing semantic if the
3147
* new XML_PARSE_OLD10 option is given to the parser.
3150
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3151
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3153
* Use the new checks of production [4] [4a] amd [5] of the
3154
* Update 5 of XML-1.0
3156
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3157
(((c >= 'a') && (c <= 'z')) ||
3158
((c >= 'A') && (c <= 'Z')) ||
3159
(c == '_') || (c == ':') ||
3160
((c >= 0xC0) && (c <= 0xD6)) ||
3161
((c >= 0xD8) && (c <= 0xF6)) ||
3162
((c >= 0xF8) && (c <= 0x2FF)) ||
3163
((c >= 0x370) && (c <= 0x37D)) ||
3164
((c >= 0x37F) && (c <= 0x1FFF)) ||
3165
((c >= 0x200C) && (c <= 0x200D)) ||
3166
((c >= 0x2070) && (c <= 0x218F)) ||
3167
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3168
((c >= 0x3001) && (c <= 0xD7FF)) ||
3169
((c >= 0xF900) && (c <= 0xFDCF)) ||
3170
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3171
((c >= 0x10000) && (c <= 0xEFFFF))))
3174
if (IS_LETTER(c) || (c == '_') || (c == ':'))
3181
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3182
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3184
* Use the new checks of production [4] [4a] amd [5] of the
3185
* Update 5 of XML-1.0
3187
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3188
(((c >= 'a') && (c <= 'z')) ||
3189
((c >= 'A') && (c <= 'Z')) ||
3190
((c >= '0') && (c <= '9')) || /* !start */
3191
(c == '_') || (c == ':') ||
3192
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3193
((c >= 0xC0) && (c <= 0xD6)) ||
3194
((c >= 0xD8) && (c <= 0xF6)) ||
3195
((c >= 0xF8) && (c <= 0x2FF)) ||
3196
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3197
((c >= 0x370) && (c <= 0x37D)) ||
3198
((c >= 0x37F) && (c <= 0x1FFF)) ||
3199
((c >= 0x200C) && (c <= 0x200D)) ||
3200
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3201
((c >= 0x2070) && (c <= 0x218F)) ||
3202
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203
((c >= 0x3001) && (c <= 0xD7FF)) ||
3204
((c >= 0xF900) && (c <= 0xFDCF)) ||
3205
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206
((c >= 0x10000) && (c <= 0xEFFFF))))
3209
if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3210
(c == '.') || (c == '-') ||
3211
(c == '_') || (c == ':') ||
3212
(IS_COMBINING(c)) ||
3219
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3220
int *len, int *alloc, int normalize);
3222
static const xmlChar *
3223
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3229
nbParseNameComplex++;
3233
* Handler for more complex cases
3236
if (ctxt->instate == XML_PARSER_EOF)
3239
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3241
* Use the new checks of production [4] [4a] amd [5] of the
3242
* Update 5 of XML-1.0
3244
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3245
(!(((c >= 'a') && (c <= 'z')) ||
3246
((c >= 'A') && (c <= 'Z')) ||
3247
(c == '_') || (c == ':') ||
3248
((c >= 0xC0) && (c <= 0xD6)) ||
3249
((c >= 0xD8) && (c <= 0xF6)) ||
3250
((c >= 0xF8) && (c <= 0x2FF)) ||
3251
((c >= 0x370) && (c <= 0x37D)) ||
3252
((c >= 0x37F) && (c <= 0x1FFF)) ||
3253
((c >= 0x200C) && (c <= 0x200D)) ||
3254
((c >= 0x2070) && (c <= 0x218F)) ||
3255
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256
((c >= 0x3001) && (c <= 0xD7FF)) ||
3257
((c >= 0xF900) && (c <= 0xFDCF)) ||
3258
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259
((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3266
(((c >= 'a') && (c <= 'z')) ||
3267
((c >= 'A') && (c <= 'Z')) ||
3268
((c >= '0') && (c <= '9')) || /* !start */
3269
(c == '_') || (c == ':') ||
3270
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3271
((c >= 0xC0) && (c <= 0xD6)) ||
3272
((c >= 0xD8) && (c <= 0xF6)) ||
3273
((c >= 0xF8) && (c <= 0x2FF)) ||
3274
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3275
((c >= 0x370) && (c <= 0x37D)) ||
3276
((c >= 0x37F) && (c <= 0x1FFF)) ||
3277
((c >= 0x200C) && (c <= 0x200D)) ||
3278
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3279
((c >= 0x2070) && (c <= 0x218F)) ||
3280
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3281
((c >= 0x3001) && (c <= 0xD7FF)) ||
3282
((c >= 0xF900) && (c <= 0xFDCF)) ||
3283
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3284
((c >= 0x10000) && (c <= 0xEFFFF))
3286
if (count++ > XML_PARSER_CHUNK_SIZE) {
3289
if (ctxt->instate == XML_PARSER_EOF)
3297
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3298
(!IS_LETTER(c) && (c != '_') &&
3306
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3307
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3308
(c == '.') || (c == '-') ||
3309
(c == '_') || (c == ':') ||
3310
(IS_COMBINING(c)) ||
3311
(IS_EXTENDER(c)))) {
3312
if (count++ > XML_PARSER_CHUNK_SIZE) {
3315
if (ctxt->instate == XML_PARSER_EOF)
3324
if (ctxt->instate == XML_PARSER_EOF)
3330
if ((len > XML_MAX_NAME_LENGTH) &&
3331
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3332
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3335
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3336
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3337
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3342
* @ctxt: an XML parser context
3344
* parse an XML name.
3346
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3347
* CombiningChar | Extender
3349
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
* [6] Names ::= Name (#x20 Name)*
3353
* Returns the Name parsed or NULL
3357
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
* Accelerator for simple ASCII names
3371
in = ctxt->input->cur;
3372
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
((*in >= 0x41) && (*in <= 0x5A)) ||
3374
(*in == '_') || (*in == ':')) {
3376
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
((*in >= 0x41) && (*in <= 0x5A)) ||
3378
((*in >= 0x30) && (*in <= 0x39)) ||
3379
(*in == '_') || (*in == '-') ||
3380
(*in == ':') || (*in == '.'))
3382
if ((*in > 0) && (*in < 0x80)) {
3383
count = in - ctxt->input->cur;
3384
if ((count > XML_MAX_NAME_LENGTH) &&
3385
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3389
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3390
ctxt->input->cur = in;
3391
ctxt->nbChars += count;
3392
ctxt->input->col += count;
3394
xmlErrMemory(ctxt, NULL);
3398
/* accelerator for special cases */
3399
return(xmlParseNameComplex(ctxt));
3402
static const xmlChar *
3403
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3407
const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3410
nbParseNCNameComplex++;
3414
* Handler for more complex cases
3417
end = ctxt->input->cur;
3419
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3420
(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3424
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3425
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
3426
if (count++ > XML_PARSER_CHUNK_SIZE) {
3427
if ((len > XML_MAX_NAME_LENGTH) &&
3428
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3429
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3434
if (ctxt->instate == XML_PARSER_EOF)
3439
end = ctxt->input->cur;
3444
if (ctxt->instate == XML_PARSER_EOF)
3446
end = ctxt->input->cur;
3450
if ((len > XML_MAX_NAME_LENGTH) &&
3451
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3452
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3455
return(xmlDictLookup(ctxt->dict, end - len, len));
3460
* @ctxt: an XML parser context
3461
* @len: length of the string parsed
3463
* parse an XML name.
3465
* [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3466
* CombiningChar | Extender
3468
* [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3470
* Returns the Name parsed or NULL
3473
static const xmlChar *
3474
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3484
* Accelerator for simple ASCII names
3486
in = ctxt->input->cur;
3487
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3488
((*in >= 0x41) && (*in <= 0x5A)) ||
3491
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3492
((*in >= 0x41) && (*in <= 0x5A)) ||
3493
((*in >= 0x30) && (*in <= 0x39)) ||
3494
(*in == '_') || (*in == '-') ||
3497
if ((*in > 0) && (*in < 0x80)) {
3498
count = in - ctxt->input->cur;
3499
if ((count > XML_MAX_NAME_LENGTH) &&
3500
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3501
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3504
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3505
ctxt->input->cur = in;
3506
ctxt->nbChars += count;
3507
ctxt->input->col += count;
3509
xmlErrMemory(ctxt, NULL);
3514
return(xmlParseNCNameComplex(ctxt));
3518
* xmlParseNameAndCompare:
3519
* @ctxt: an XML parser context
3521
* parse an XML name and compares for match
3522
* (specialized for endtag parsing)
3524
* Returns NULL for an illegal name, (xmlChar*) 1 for success
3525
* and the name for mismatch
3528
static const xmlChar *
3529
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3530
register const xmlChar *cmp = other;
3531
register const xmlChar *in;
3535
if (ctxt->instate == XML_PARSER_EOF)
3538
in = ctxt->input->cur;
3539
while (*in != 0 && *in == *cmp) {
3544
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3546
ctxt->input->cur = in;
3547
return (const xmlChar*) 1;
3549
/* failure (or end of input buffer), check with full function */
3550
ret = xmlParseName (ctxt);
3551
/* strings coming from the dictionnary direct compare possible */
3553
return (const xmlChar*) 1;
3559
* xmlParseStringName:
3560
* @ctxt: an XML parser context
3561
* @str: a pointer to the string pointer (IN/OUT)
3563
* parse an XML name.
3565
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3566
* CombiningChar | Extender
3568
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3570
* [6] Names ::= Name (#x20 Name)*
3572
* Returns the Name parsed or NULL. The @str pointer
3573
* is updated to the current location in the string.
3577
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3578
xmlChar buf[XML_MAX_NAMELEN + 5];
3579
const xmlChar *cur = *str;
3584
nbParseStringName++;
3587
c = CUR_SCHAR(cur, l);
3588
if (!xmlIsNameStartChar(ctxt, c)) {
3592
COPY_BUF(l,buf,len,c);
3594
c = CUR_SCHAR(cur, l);
3595
while (xmlIsNameChar(ctxt, c)) {
3596
COPY_BUF(l,buf,len,c);
3598
c = CUR_SCHAR(cur, l);
3599
if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3601
* Okay someone managed to make a huge name, so he's ready to pay
3602
* for the processing speed.
3607
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3608
if (buffer == NULL) {
3609
xmlErrMemory(ctxt, NULL);
3612
memcpy(buffer, buf, len);
3613
while (xmlIsNameChar(ctxt, c)) {
3614
if (len + 10 > max) {
3617
if ((len > XML_MAX_NAME_LENGTH) &&
3618
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3619
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3624
tmp = (xmlChar *) xmlRealloc(buffer,
3625
max * sizeof(xmlChar));
3627
xmlErrMemory(ctxt, NULL);
3633
COPY_BUF(l,buffer,len,c);
3635
c = CUR_SCHAR(cur, l);
3642
if ((len > XML_MAX_NAME_LENGTH) &&
3643
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3644
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3648
return(xmlStrndup(buf, len));
3653
* @ctxt: an XML parser context
3655
* parse an XML Nmtoken.
3657
* [7] Nmtoken ::= (NameChar)+
3659
* [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3661
* Returns the Nmtoken parsed or NULL
3665
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3666
xmlChar buf[XML_MAX_NAMELEN + 5];
3676
if (ctxt->instate == XML_PARSER_EOF)
3680
while (xmlIsNameChar(ctxt, c)) {
3681
if (count++ > XML_PARSER_CHUNK_SIZE) {
3685
COPY_BUF(l,buf,len,c);
3691
if (ctxt->instate == XML_PARSER_EOF)
3695
if (len >= XML_MAX_NAMELEN) {
3697
* Okay someone managed to make a huge token, so he's ready to pay
3698
* for the processing speed.
3703
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3704
if (buffer == NULL) {
3705
xmlErrMemory(ctxt, NULL);
3708
memcpy(buffer, buf, len);
3709
while (xmlIsNameChar(ctxt, c)) {
3710
if (count++ > XML_PARSER_CHUNK_SIZE) {
3713
if (ctxt->instate == XML_PARSER_EOF) {
3718
if (len + 10 > max) {
3721
if ((max > XML_MAX_NAME_LENGTH) &&
3722
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3723
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3728
tmp = (xmlChar *) xmlRealloc(buffer,
3729
max * sizeof(xmlChar));
3731
xmlErrMemory(ctxt, NULL);
3737
COPY_BUF(l,buffer,len,c);
3747
if ((len > XML_MAX_NAME_LENGTH) &&
3748
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3749
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3752
return(xmlStrndup(buf, len));
3756
* xmlParseEntityValue:
3757
* @ctxt: an XML parser context
3758
* @orig: if non-NULL store a copy of the original entity value
3760
* parse a value for ENTITY declarations
3762
* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3763
* "'" ([^%&'] | PEReference | Reference)* "'"
3765
* Returns the EntityValue parsed with reference substituted or NULL
3769
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3770
xmlChar *buf = NULL;
3772
int size = XML_PARSER_BUFFER_SIZE;
3775
xmlChar *ret = NULL;
3776
const xmlChar *cur = NULL;
3777
xmlParserInputPtr input;
3779
if (RAW == '"') stop = '"';
3780
else if (RAW == '\'') stop = '\'';
3782
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3785
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3787
xmlErrMemory(ctxt, NULL);
3792
* The content of the entity definition is copied in a buffer.
3795
ctxt->instate = XML_PARSER_ENTITY_VALUE;
3796
input = ctxt->input;
3798
if (ctxt->instate == XML_PARSER_EOF) {
3805
* NOTE: 4.4.5 Included in Literal
3806
* When a parameter entity reference appears in a literal entity
3807
* value, ... a single or double quote character in the replacement
3808
* text is always treated as a normal data character and will not
3809
* terminate the literal.
3810
* In practice it means we stop the loop only when back at parsing
3811
* the initial entity and the quote is found
3813
while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3814
(ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3815
if (len + 5 >= size) {
3819
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3821
xmlErrMemory(ctxt, NULL);
3827
COPY_BUF(l,buf,len,c);
3830
* Pop-up of finished entities.
3832
while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3843
if (ctxt->instate == XML_PARSER_EOF) {
3849
* Raise problem w.r.t. '&' and '%' being used in non-entities
3850
* reference constructs. Note Charref will be handled in
3851
* xmlStringDecodeEntities()
3854
while (*cur != 0) { /* non input consuming */
3855
if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3860
name = xmlParseStringName(ctxt, &cur);
3861
if ((name == NULL) || (*cur != ';')) {
3862
xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3863
"EntityValue: '%c' forbidden except for entities references\n",
3866
if ((tmp == '%') && (ctxt->inSubset == 1) &&
3867
(ctxt->inputNr == 1)) {
3868
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3879
* Then PEReference entities are substituted.
3882
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3887
* NOTE: 4.4.7 Bypassed
3888
* When a general entity reference appears in the EntityValue in
3889
* an entity declaration, it is bypassed and left as is.
3890
* so XML_SUBSTITUTE_REF is not set here.
3892
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3904
* xmlParseAttValueComplex:
3905
* @ctxt: an XML parser context
3906
* @len: the resulting attribute len
3907
* @normalize: wether to apply the inner normalization
3909
* parse a value for an attribute, this is the fallback function
3910
* of xmlParseAttValue() when the attribute parsing requires handling
3911
* of non-ASCII characters, or normalization compaction.
3913
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3916
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3918
xmlChar *buf = NULL;
3919
xmlChar *rep = NULL;
3921
size_t buf_size = 0;
3922
int c, l, in_space = 0;
3923
xmlChar *current = NULL;
3926
if (NXT(0) == '"') {
3927
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3930
} else if (NXT(0) == '\'') {
3932
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3935
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3940
* allocate a translation buffer.
3942
buf_size = XML_PARSER_BUFFER_SIZE;
3943
buf = (xmlChar *) xmlMallocAtomic(buf_size);
3944
if (buf == NULL) goto mem_error;
3947
* OK loop until we reach one of the ending char or a size limit.
3950
while (((NXT(0) != limit) && /* checked */
3951
(IS_CHAR(c)) && (c != '<')) &&
3952
(ctxt->instate != XML_PARSER_EOF)) {
3954
* Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3955
* special option is given
3957
if ((len > XML_MAX_TEXT_LENGTH) &&
3958
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3959
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3960
"AttValue length too long\n");
3966
if (NXT(1) == '#') {
3967
int val = xmlParseCharRef(ctxt);
3970
if (ctxt->replaceEntities) {
3971
if (len + 10 > buf_size) {
3972
growBuffer(buf, 10);
3977
* The reparsing will be done in xmlStringGetNodeList()
3978
* called by the attribute() function in SAX.c
3980
if (len + 10 > buf_size) {
3981
growBuffer(buf, 10);
3989
} else if (val != 0) {
3990
if (len + 10 > buf_size) {
3991
growBuffer(buf, 10);
3993
len += xmlCopyChar(0, &buf[len], val);
3996
ent = xmlParseEntityRef(ctxt);
3999
ctxt->nbentities += ent->owner;
4000
if ((ent != NULL) &&
4001
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4002
if (len + 10 > buf_size) {
4003
growBuffer(buf, 10);
4005
if ((ctxt->replaceEntities == 0) &&
4006
(ent->content[0] == '&')) {
4013
buf[len++] = ent->content[0];
4015
} else if ((ent != NULL) &&
4016
(ctxt->replaceEntities != 0)) {
4017
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4018
rep = xmlStringDecodeEntities(ctxt, ent->content,
4023
while (*current != 0) { /* non input consuming */
4024
if ((*current == 0xD) || (*current == 0xA) ||
4025
(*current == 0x9)) {
4029
buf[len++] = *current++;
4030
if (len + 10 > buf_size) {
4031
growBuffer(buf, 10);
4038
if (len + 10 > buf_size) {
4039
growBuffer(buf, 10);
4041
if (ent->content != NULL)
4042
buf[len++] = ent->content[0];
4044
} else if (ent != NULL) {
4045
int i = xmlStrlen(ent->name);
4046
const xmlChar *cur = ent->name;
4049
* This may look absurd but is needed to detect
4052
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4053
(ent->content != NULL) && (ent->checked == 0)) {
4054
unsigned long oldnbent = ctxt->nbentities;
4056
rep = xmlStringDecodeEntities(ctxt, ent->content,
4057
XML_SUBSTITUTE_REF, 0, 0, 0);
4059
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4061
if (xmlStrchr(rep, '<'))
4069
* Just output the reference
4072
while (len + i + 10 > buf_size) {
4073
growBuffer(buf, i + 10);
4076
buf[len++] = *cur++;
4081
if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4082
if ((len != 0) || (!normalize)) {
4083
if ((!normalize) || (!in_space)) {
4084
COPY_BUF(l,buf,len,0x20);
4085
while (len + 10 > buf_size) {
4086
growBuffer(buf, 10);
4093
COPY_BUF(l,buf,len,c);
4094
if (len + 10 > buf_size) {
4095
growBuffer(buf, 10);
4103
if (ctxt->instate == XML_PARSER_EOF)
4106
if ((in_space) && (normalize)) {
4107
while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4111
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4112
} else if (RAW != limit) {
4113
if ((c != 0) && (!IS_CHAR(c))) {
4114
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4115
"invalid character in attribute value\n");
4117
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4118
"AttValue: ' expected\n");
4124
* There we potentially risk an overflow, don't allow attribute value of
4125
* length more than INT_MAX it is a very reasonnable assumption !
4127
if (len >= INT_MAX) {
4128
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4129
"AttValue length too long\n");
4133
if (attlen != NULL) *attlen = (int) len;
4137
xmlErrMemory(ctxt, NULL);
4148
* @ctxt: an XML parser context
4150
* parse a value for an attribute
4151
* Note: the parser won't do substitution of entities here, this
4152
* will be handled later in xmlStringGetNodeList
4154
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4155
* "'" ([^<&'] | Reference)* "'"
4157
* 3.3.3 Attribute-Value Normalization:
4158
* Before the value of an attribute is passed to the application or
4159
* checked for validity, the XML processor must normalize it as follows:
4160
* - a character reference is processed by appending the referenced
4161
* character to the attribute value
4162
* - an entity reference is processed by recursively processing the
4163
* replacement text of the entity
4164
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4165
* appending #x20 to the normalized value, except that only a single
4166
* #x20 is appended for a "#xD#xA" sequence that is part of an external
4167
* parsed entity or the literal entity value of an internal parsed entity
4168
* - other characters are processed by appending them to the normalized value
4169
* If the declared value is not CDATA, then the XML processor must further
4170
* process the normalized attribute value by discarding any leading and
4171
* trailing space (#x20) characters, and by replacing sequences of space
4172
* (#x20) characters by a single space (#x20) character.
4173
* All attributes for which no declaration has been read should be treated
4174
* by a non-validating parser as if declared CDATA.
4176
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4181
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4182
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4183
return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4187
* xmlParseSystemLiteral:
4188
* @ctxt: an XML parser context
4190
* parse an XML Literal
4192
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4194
* Returns the SystemLiteral parsed or NULL
4198
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4199
xmlChar *buf = NULL;
4201
int size = XML_PARSER_BUFFER_SIZE;
4204
int state = ctxt->instate;
4211
} else if (RAW == '\'') {
4215
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4219
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4221
xmlErrMemory(ctxt, NULL);
4224
ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4226
while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4227
if (len + 5 >= size) {
4230
if ((size > XML_MAX_NAME_LENGTH) &&
4231
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4232
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4234
ctxt->instate = (xmlParserInputState) state;
4238
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4241
xmlErrMemory(ctxt, NULL);
4242
ctxt->instate = (xmlParserInputState) state;
4251
if (ctxt->instate == XML_PARSER_EOF) {
4256
COPY_BUF(l,buf,len,cur);
4266
ctxt->instate = (xmlParserInputState) state;
4267
if (!IS_CHAR(cur)) {
4268
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4276
* xmlParsePubidLiteral:
4277
* @ctxt: an XML parser context
4279
* parse an XML public literal
4281
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4283
* Returns the PubidLiteral parsed or NULL.
4287
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4288
xmlChar *buf = NULL;
4290
int size = XML_PARSER_BUFFER_SIZE;
4294
xmlParserInputState oldstate = ctxt->instate;
4300
} else if (RAW == '\'') {
4304
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4307
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4309
xmlErrMemory(ctxt, NULL);
4312
ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4314
while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4315
if (len + 1 >= size) {
4318
if ((size > XML_MAX_NAME_LENGTH) &&
4319
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4320
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4325
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4327
xmlErrMemory(ctxt, NULL);
4338
if (ctxt->instate == XML_PARSER_EOF) {
4353
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4357
ctxt->instate = oldstate;
4361
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4364
* used for the test in the inner loop of the char data testing
4366
static const unsigned char test_char_data[256] = {
4367
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368
0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4369
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4372
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4373
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4374
0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4375
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4376
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4377
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4378
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4379
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4380
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4381
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4382
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4383
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4384
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4403
* @ctxt: an XML parser context
4404
* @cdata: int indicating whether we are within a CDATA section
4406
* parse a CharData section.
4407
* if we are within a CDATA section ']]>' marks an end of section.
4409
* The right angle bracket (>) may be represented using the string ">",
4410
* and must, for compatibility, be escaped using ">" or a character
4411
* reference when it appears in the string "]]>" in content, when that
4412
* string is not marking the end of a CDATA section.
4414
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4418
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4421
int line = ctxt->input->line;
4422
int col = ctxt->input->col;
4428
* Accelerated common case where input don't need to be
4429
* modified before passing it to the handler.
4432
in = ctxt->input->cur;
4435
while (*in == 0x20) { in++; ctxt->input->col++; }
4438
ctxt->input->line++; ctxt->input->col = 1;
4440
} while (*in == 0xA);
4441
goto get_more_space;
4444
nbchar = in - ctxt->input->cur;
4446
const xmlChar *tmp = ctxt->input->cur;
4447
ctxt->input->cur = in;
4449
if ((ctxt->sax != NULL) &&
4450
(ctxt->sax->ignorableWhitespace !=
4451
ctxt->sax->characters)) {
4452
if (areBlanks(ctxt, tmp, nbchar, 1)) {
4453
if (ctxt->sax->ignorableWhitespace != NULL)
4454
ctxt->sax->ignorableWhitespace(ctxt->userData,
4457
if (ctxt->sax->characters != NULL)
4458
ctxt->sax->characters(ctxt->userData,
4460
if (*ctxt->space == -1)
4463
} else if ((ctxt->sax != NULL) &&
4464
(ctxt->sax->characters != NULL)) {
4465
ctxt->sax->characters(ctxt->userData,
4473
ccol = ctxt->input->col;
4474
while (test_char_data[*in]) {
4478
ctxt->input->col = ccol;
4481
ctxt->input->line++; ctxt->input->col = 1;
4483
} while (*in == 0xA);
4487
if ((in[1] == ']') && (in[2] == '>')) {
4488
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4489
ctxt->input->cur = in;
4496
nbchar = in - ctxt->input->cur;
4498
if ((ctxt->sax != NULL) &&
4499
(ctxt->sax->ignorableWhitespace !=
4500
ctxt->sax->characters) &&
4501
(IS_BLANK_CH(*ctxt->input->cur))) {
4502
const xmlChar *tmp = ctxt->input->cur;
4503
ctxt->input->cur = in;
4505
if (areBlanks(ctxt, tmp, nbchar, 0)) {
4506
if (ctxt->sax->ignorableWhitespace != NULL)
4507
ctxt->sax->ignorableWhitespace(ctxt->userData,
4510
if (ctxt->sax->characters != NULL)
4511
ctxt->sax->characters(ctxt->userData,
4513
if (*ctxt->space == -1)
4516
line = ctxt->input->line;
4517
col = ctxt->input->col;
4518
} else if (ctxt->sax != NULL) {
4519
if (ctxt->sax->characters != NULL)
4520
ctxt->sax->characters(ctxt->userData,
4521
ctxt->input->cur, nbchar);
4522
line = ctxt->input->line;
4523
col = ctxt->input->col;
4525
/* something really bad happened in the SAX callback */
4526
if (ctxt->instate != XML_PARSER_CONTENT)
4529
ctxt->input->cur = in;
4533
ctxt->input->cur = in;
4535
ctxt->input->line++; ctxt->input->col = 1;
4536
continue; /* while */
4548
if (ctxt->instate == XML_PARSER_EOF)
4550
in = ctxt->input->cur;
4551
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4554
ctxt->input->line = line;
4555
ctxt->input->col = col;
4556
xmlParseCharDataComplex(ctxt, cdata);
4560
* xmlParseCharDataComplex:
4561
* @ctxt: an XML parser context
4562
* @cdata: int indicating whether we are within a CDATA section
4564
* parse a CharData section.this is the fallback function
4565
* of xmlParseCharData() when the parsing requires handling
4566
* of non-ASCII characters.
4569
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4570
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4578
while ((cur != '<') && /* checked */
4580
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4581
if ((cur == ']') && (NXT(1) == ']') &&
4585
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4588
COPY_BUF(l,buf,nbchar,cur);
4589
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4593
* OK the segment is to be consumed as chars.
4595
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4596
if (areBlanks(ctxt, buf, nbchar, 0)) {
4597
if (ctxt->sax->ignorableWhitespace != NULL)
4598
ctxt->sax->ignorableWhitespace(ctxt->userData,
4601
if (ctxt->sax->characters != NULL)
4602
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4603
if ((ctxt->sax->characters !=
4604
ctxt->sax->ignorableWhitespace) &&
4605
(*ctxt->space == -1))
4610
/* something really bad happened in the SAX callback */
4611
if (ctxt->instate != XML_PARSER_CONTENT)
4618
if (ctxt->instate == XML_PARSER_EOF)
4627
* OK the segment is to be consumed as chars.
4629
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4630
if (areBlanks(ctxt, buf, nbchar, 0)) {
4631
if (ctxt->sax->ignorableWhitespace != NULL)
4632
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4634
if (ctxt->sax->characters != NULL)
4635
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4636
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4637
(*ctxt->space == -1))
4642
if ((cur != 0) && (!IS_CHAR(cur))) {
4643
/* Generate the error and skip the offending character */
4644
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4645
"PCDATA invalid Char value %d\n",
4652
* xmlParseExternalID:
4653
* @ctxt: an XML parser context
4654
* @publicID: a xmlChar** receiving PubidLiteral
4655
* @strict: indicate whether we should restrict parsing to only
4656
* production [75], see NOTE below
4658
* Parse an External ID or a Public ID
4660
* NOTE: Productions [75] and [83] interact badly since [75] can generate
4661
* 'PUBLIC' S PubidLiteral S SystemLiteral
4663
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4664
* | 'PUBLIC' S PubidLiteral S SystemLiteral
4666
* [83] PublicID ::= 'PUBLIC' S PubidLiteral
4668
* Returns the function returns SystemLiteral and in the second
4669
* case publicID receives PubidLiteral, is strict is off
4670
* it is possible to return NULL and have publicID set.
4674
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4675
xmlChar *URI = NULL;
4680
if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4682
if (!IS_BLANK_CH(CUR)) {
4683
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4684
"Space required after 'SYSTEM'\n");
4687
URI = xmlParseSystemLiteral(ctxt);
4689
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4691
} else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4693
if (!IS_BLANK_CH(CUR)) {
4694
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4695
"Space required after 'PUBLIC'\n");
4698
*publicID = xmlParsePubidLiteral(ctxt);
4699
if (*publicID == NULL) {
4700
xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4704
* We don't handle [83] so "S SystemLiteral" is required.
4706
if (!IS_BLANK_CH(CUR)) {
4707
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4708
"Space required after the Public Identifier\n");
4712
* We handle [83] so we return immediately, if
4713
* "S SystemLiteral" is not detected. From a purely parsing
4714
* point of view that's a nice mess.
4720
if (!IS_BLANK_CH(*ptr)) return(NULL);
4722
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4723
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4726
URI = xmlParseSystemLiteral(ctxt);
4728
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4735
* xmlParseCommentComplex:
4736
* @ctxt: an XML parser context
4737
* @buf: the already parsed part of the buffer
4738
* @len: number of bytes filles in the buffer
4739
* @size: allocated size of the buffer
4741
* Skip an XML (SGML) comment <!-- .... -->
4742
* The spec says that "For compatibility, the string "--" (double-hyphen)
4743
* must not occur within comments. "
4744
* This is the slow routine in case the accelerator for ascii didn't work
4746
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4749
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4750
size_t len, size_t size) {
4757
inputid = ctxt->input->id;
4761
size = XML_PARSER_BUFFER_SIZE;
4762
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4764
xmlErrMemory(ctxt, NULL);
4768
GROW; /* Assure there's enough input data */
4771
goto not_terminated;
4773
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4774
"xmlParseComment: invalid xmlChar value %d\n",
4782
goto not_terminated;
4784
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4785
"xmlParseComment: invalid xmlChar value %d\n",
4793
goto not_terminated;
4794
while (IS_CHAR(cur) && /* checked */
4796
(r != '-') || (q != '-'))) {
4797
if ((r == '-') && (q == '-')) {
4798
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4800
if ((len > XML_MAX_TEXT_LENGTH) &&
4801
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4802
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4803
"Comment too big found", NULL);
4807
if (len + 5 >= size) {
4811
new_size = size * 2;
4812
new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4813
if (new_buf == NULL) {
4815
xmlErrMemory(ctxt, NULL);
4821
COPY_BUF(ql,buf,len,q);
4831
if (ctxt->instate == XML_PARSER_EOF) {
4846
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4847
"Comment not terminated \n<!--%.50s\n", buf);
4848
} else if (!IS_CHAR(cur)) {
4849
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4850
"xmlParseComment: invalid xmlChar value %d\n",
4853
if (inputid != ctxt->input->id) {
4854
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4855
"Comment doesn't start and stop in the same entity\n");
4858
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4859
(!ctxt->disableSAX))
4860
ctxt->sax->comment(ctxt->userData, buf);
4865
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4866
"Comment not terminated\n", NULL);
4873
* @ctxt: an XML parser context
4875
* Skip an XML (SGML) comment <!-- .... -->
4876
* The spec says that "For compatibility, the string "--" (double-hyphen)
4877
* must not occur within comments. "
4879
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4882
xmlParseComment(xmlParserCtxtPtr ctxt) {
4883
xmlChar *buf = NULL;
4884
size_t size = XML_PARSER_BUFFER_SIZE;
4886
xmlParserInputState state;
4893
* Check that there is a comment right here.
4895
if ((RAW != '<') || (NXT(1) != '!') ||
4896
(NXT(2) != '-') || (NXT(3) != '-')) return;
4897
state = ctxt->instate;
4898
ctxt->instate = XML_PARSER_COMMENT;
4899
inputid = ctxt->input->id;
4905
* Accelerated common case where input don't need to be
4906
* modified before passing it to the handler.
4908
in = ctxt->input->cur;
4912
ctxt->input->line++; ctxt->input->col = 1;
4914
} while (*in == 0xA);
4917
ccol = ctxt->input->col;
4918
while (((*in > '-') && (*in <= 0x7F)) ||
4919
((*in >= 0x20) && (*in < '-')) ||
4924
ctxt->input->col = ccol;
4927
ctxt->input->line++; ctxt->input->col = 1;
4929
} while (*in == 0xA);
4932
nbchar = in - ctxt->input->cur;
4934
* save current set of data
4937
if ((ctxt->sax != NULL) &&
4938
(ctxt->sax->comment != NULL)) {
4940
if ((*in == '-') && (in[1] == '-'))
4943
size = XML_PARSER_BUFFER_SIZE + nbchar;
4944
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4946
xmlErrMemory(ctxt, NULL);
4947
ctxt->instate = state;
4951
} else if (len + nbchar + 1 >= size) {
4953
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4954
new_buf = (xmlChar *) xmlRealloc(buf,
4955
size * sizeof(xmlChar));
4956
if (new_buf == NULL) {
4958
xmlErrMemory(ctxt, NULL);
4959
ctxt->instate = state;
4964
memcpy(&buf[len], ctxt->input->cur, nbchar);
4969
if ((len > XML_MAX_TEXT_LENGTH) &&
4970
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4971
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4972
"Comment too big found", NULL);
4976
ctxt->input->cur = in;
4979
ctxt->input->line++; ctxt->input->col = 1;
4984
ctxt->input->cur = in;
4986
ctxt->input->line++; ctxt->input->col = 1;
4987
continue; /* while */
4993
if (ctxt->instate == XML_PARSER_EOF) {
4997
in = ctxt->input->cur;
5001
if (ctxt->input->id != inputid) {
5002
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5003
"comment doesn't start and stop in the same entity\n");
5006
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5007
(!ctxt->disableSAX)) {
5009
ctxt->sax->comment(ctxt->userData, buf);
5011
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5015
if (ctxt->instate != XML_PARSER_EOF)
5016
ctxt->instate = state;
5020
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5021
"Double hyphen within comment: "
5025
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5026
"Double hyphen within comment\n", NULL);
5034
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5035
xmlParseCommentComplex(ctxt, buf, len, size);
5036
ctxt->instate = state;
5043
* @ctxt: an XML parser context
5045
* parse the name of a PI
5047
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5049
* Returns the PITarget name or NULL
5053
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5054
const xmlChar *name;
5056
name = xmlParseName(ctxt);
5057
if ((name != NULL) &&
5058
((name[0] == 'x') || (name[0] == 'X')) &&
5059
((name[1] == 'm') || (name[1] == 'M')) &&
5060
((name[2] == 'l') || (name[2] == 'L'))) {
5062
if ((name[0] == 'x') && (name[1] == 'm') &&
5063
(name[2] == 'l') && (name[3] == 0)) {
5064
xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5065
"XML declaration allowed only at the start of the document\n");
5067
} else if (name[3] == 0) {
5068
xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5072
if (xmlW3CPIs[i] == NULL) break;
5073
if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5076
xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5077
"xmlParsePITarget: invalid name prefix 'xml'\n",
5080
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5081
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5082
"colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5087
#ifdef LIBXML_CATALOG_ENABLED
5089
* xmlParseCatalogPI:
5090
* @ctxt: an XML parser context
5091
* @catalog: the PI value string
5093
* parse an XML Catalog Processing Instruction.
5095
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5097
* Occurs only if allowed by the user and if happening in the Misc
5098
* part of the document before any doctype informations
5099
* This will add the given catalog to the parsing context in order
5100
* to be used if there is a resolution need further down in the document
5104
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5105
xmlChar *URL = NULL;
5106
const xmlChar *tmp, *base;
5110
while (IS_BLANK_CH(*tmp)) tmp++;
5111
if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5114
while (IS_BLANK_CH(*tmp)) tmp++;
5119
while (IS_BLANK_CH(*tmp)) tmp++;
5121
if ((marker != '\'') && (marker != '"'))
5125
while ((*tmp != 0) && (*tmp != marker)) tmp++;
5128
URL = xmlStrndup(base, tmp - base);
5130
while (IS_BLANK_CH(*tmp)) tmp++;
5135
ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5141
xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5142
"Catalog PI syntax error: %s\n",
5151
* @ctxt: an XML parser context
5153
* parse an XML Processing Instruction.
5155
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5157
* The processing is transfered to SAX once parsed.
5161
xmlParsePI(xmlParserCtxtPtr ctxt) {
5162
xmlChar *buf = NULL;
5164
size_t size = XML_PARSER_BUFFER_SIZE;
5166
const xmlChar *target;
5167
xmlParserInputState state;
5170
if ((RAW == '<') && (NXT(1) == '?')) {
5171
xmlParserInputPtr input = ctxt->input;
5172
state = ctxt->instate;
5173
ctxt->instate = XML_PARSER_PI;
5175
* this is a Processing Instruction.
5181
* Parse the target name and check for special support like
5184
target = xmlParsePITarget(ctxt);
5185
if (target != NULL) {
5186
if ((RAW == '?') && (NXT(1) == '>')) {
5187
if (input != ctxt->input) {
5188
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5189
"PI declaration doesn't start and stop in the same entity\n");
5196
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5197
(ctxt->sax->processingInstruction != NULL))
5198
ctxt->sax->processingInstruction(ctxt->userData,
5200
if (ctxt->instate != XML_PARSER_EOF)
5201
ctxt->instate = state;
5204
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5206
xmlErrMemory(ctxt, NULL);
5207
ctxt->instate = state;
5211
if (!IS_BLANK(cur)) {
5212
xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5213
"ParsePI: PI %s space expected\n", target);
5217
while (IS_CHAR(cur) && /* checked */
5218
((cur != '?') || (NXT(1) != '>'))) {
5219
if (len + 5 >= size) {
5221
size_t new_size = size * 2;
5222
tmp = (xmlChar *) xmlRealloc(buf, new_size);
5224
xmlErrMemory(ctxt, NULL);
5226
ctxt->instate = state;
5235
if (ctxt->instate == XML_PARSER_EOF) {
5240
if ((len > XML_MAX_TEXT_LENGTH) &&
5241
((ctxt->options & XML_PARSE_HUGE) == 0)) {
5242
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5243
"PI %s too big found", target);
5245
ctxt->instate = state;
5249
COPY_BUF(l,buf,len,cur);
5258
if ((len > XML_MAX_TEXT_LENGTH) &&
5259
((ctxt->options & XML_PARSE_HUGE) == 0)) {
5260
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5261
"PI %s too big found", target);
5263
ctxt->instate = state;
5268
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5269
"ParsePI: PI %s never end ...\n", target);
5271
if (input != ctxt->input) {
5272
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5273
"PI declaration doesn't start and stop in the same entity\n");
5277
#ifdef LIBXML_CATALOG_ENABLED
5278
if (((state == XML_PARSER_MISC) ||
5279
(state == XML_PARSER_START)) &&
5280
(xmlStrEqual(target, XML_CATALOG_PI))) {
5281
xmlCatalogAllow allow = xmlCatalogGetDefaults();
5282
if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5283
(allow == XML_CATA_ALLOW_ALL))
5284
xmlParseCatalogPI(ctxt, buf);
5292
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5293
(ctxt->sax->processingInstruction != NULL))
5294
ctxt->sax->processingInstruction(ctxt->userData,
5299
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5301
if (ctxt->instate != XML_PARSER_EOF)
5302
ctxt->instate = state;
5307
* xmlParseNotationDecl:
5308
* @ctxt: an XML parser context
5310
* parse a notation declaration
5312
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5314
* Hence there is actually 3 choices:
5315
* 'PUBLIC' S PubidLiteral
5316
* 'PUBLIC' S PubidLiteral S SystemLiteral
5317
* and 'SYSTEM' S SystemLiteral
5319
* See the NOTE on xmlParseExternalID().
5323
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5324
const xmlChar *name;
5328
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5329
xmlParserInputPtr input = ctxt->input;
5332
if (!IS_BLANK_CH(CUR)) {
5333
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5334
"Space required after '<!NOTATION'\n");
5339
name = xmlParseName(ctxt);
5341
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5344
if (!IS_BLANK_CH(CUR)) {
5345
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5346
"Space required after the NOTATION name'\n");
5349
if (xmlStrchr(name, ':') != NULL) {
5350
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5351
"colon are forbidden from notation names '%s'\n",
5359
Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5363
if (input != ctxt->input) {
5364
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5365
"Notation declaration doesn't start and stop in the same entity\n");
5368
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5369
(ctxt->sax->notationDecl != NULL))
5370
ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5372
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5374
if (Systemid != NULL) xmlFree(Systemid);
5375
if (Pubid != NULL) xmlFree(Pubid);
5380
* xmlParseEntityDecl:
5381
* @ctxt: an XML parser context
5383
* parse <!ENTITY declarations
5385
* [70] EntityDecl ::= GEDecl | PEDecl
5387
* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5389
* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5391
* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5393
* [74] PEDef ::= EntityValue | ExternalID
5395
* [76] NDataDecl ::= S 'NDATA' S Name
5397
* [ VC: Notation Declared ]
5398
* The Name must match the declared name of a notation.
5402
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5403
const xmlChar *name = NULL;
5404
xmlChar *value = NULL;
5405
xmlChar *URI = NULL, *literal = NULL;
5406
const xmlChar *ndata = NULL;
5407
int isParameter = 0;
5408
xmlChar *orig = NULL;
5411
/* GROW; done in the caller */
5412
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5413
xmlParserInputPtr input = ctxt->input;
5416
skipped = SKIP_BLANKS;
5418
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5419
"Space required after '<!ENTITY'\n");
5424
skipped = SKIP_BLANKS;
5426
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5427
"Space required after '%'\n");
5432
name = xmlParseName(ctxt);
5434
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5435
"xmlParseEntityDecl: no name\n");
5438
if (xmlStrchr(name, ':') != NULL) {
5439
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5440
"colon are forbidden from entities names '%s'\n",
5443
skipped = SKIP_BLANKS;
5445
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5446
"Space required after the entity name\n");
5449
ctxt->instate = XML_PARSER_ENTITY_DECL;
5451
* handle the various case of definitions...
5454
if ((RAW == '"') || (RAW == '\'')) {
5455
value = xmlParseEntityValue(ctxt, &orig);
5457
if ((ctxt->sax != NULL) &&
5458
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5459
ctxt->sax->entityDecl(ctxt->userData, name,
5460
XML_INTERNAL_PARAMETER_ENTITY,
5464
URI = xmlParseExternalID(ctxt, &literal, 1);
5465
if ((URI == NULL) && (literal == NULL)) {
5466
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5471
uri = xmlParseURI((const char *) URI);
5473
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5474
"Invalid URI: %s\n", URI);
5476
* This really ought to be a well formedness error
5477
* but the XML Core WG decided otherwise c.f. issue
5478
* E26 of the XML erratas.
5481
if (uri->fragment != NULL) {
5483
* Okay this is foolish to block those but not
5486
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5488
if ((ctxt->sax != NULL) &&
5489
(!ctxt->disableSAX) &&
5490
(ctxt->sax->entityDecl != NULL))
5491
ctxt->sax->entityDecl(ctxt->userData, name,
5492
XML_EXTERNAL_PARAMETER_ENTITY,
5493
literal, URI, NULL);
5500
if ((RAW == '"') || (RAW == '\'')) {
5501
value = xmlParseEntityValue(ctxt, &orig);
5502
if ((ctxt->sax != NULL) &&
5503
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5504
ctxt->sax->entityDecl(ctxt->userData, name,
5505
XML_INTERNAL_GENERAL_ENTITY,
5508
* For expat compatibility in SAX mode.
5510
if ((ctxt->myDoc == NULL) ||
5511
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5512
if (ctxt->myDoc == NULL) {
5513
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5514
if (ctxt->myDoc == NULL) {
5515
xmlErrMemory(ctxt, "New Doc failed");
5518
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5520
if (ctxt->myDoc->intSubset == NULL)
5521
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5522
BAD_CAST "fake", NULL, NULL);
5524
xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5528
URI = xmlParseExternalID(ctxt, &literal, 1);
5529
if ((URI == NULL) && (literal == NULL)) {
5530
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5535
uri = xmlParseURI((const char *)URI);
5537
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5538
"Invalid URI: %s\n", URI);
5540
* This really ought to be a well formedness error
5541
* but the XML Core WG decided otherwise c.f. issue
5542
* E26 of the XML erratas.
5545
if (uri->fragment != NULL) {
5547
* Okay this is foolish to block those but not
5550
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5555
if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5556
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5557
"Space required before 'NDATA'\n");
5560
if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5562
if (!IS_BLANK_CH(CUR)) {
5563
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5564
"Space required after 'NDATA'\n");
5567
ndata = xmlParseName(ctxt);
5568
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5569
(ctxt->sax->unparsedEntityDecl != NULL))
5570
ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5571
literal, URI, ndata);
5573
if ((ctxt->sax != NULL) &&
5574
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5575
ctxt->sax->entityDecl(ctxt->userData, name,
5576
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5577
literal, URI, NULL);
5579
* For expat compatibility in SAX mode.
5580
* assuming the entity repalcement was asked for
5582
if ((ctxt->replaceEntities != 0) &&
5583
((ctxt->myDoc == NULL) ||
5584
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5585
if (ctxt->myDoc == NULL) {
5586
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5587
if (ctxt->myDoc == NULL) {
5588
xmlErrMemory(ctxt, "New Doc failed");
5591
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5594
if (ctxt->myDoc->intSubset == NULL)
5595
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
BAD_CAST "fake", NULL, NULL);
5597
xmlSAX2EntityDecl(ctxt, name,
5598
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5599
literal, URI, NULL);
5604
if (ctxt->instate == XML_PARSER_EOF)
5608
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5609
"xmlParseEntityDecl: entity %s not terminated\n", name);
5611
if (input != ctxt->input) {
5612
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5613
"Entity declaration doesn't start and stop in the same entity\n");
5619
* Ugly mechanism to save the raw entity value.
5621
xmlEntityPtr cur = NULL;
5624
if ((ctxt->sax != NULL) &&
5625
(ctxt->sax->getParameterEntity != NULL))
5626
cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5628
if ((ctxt->sax != NULL) &&
5629
(ctxt->sax->getEntity != NULL))
5630
cur = ctxt->sax->getEntity(ctxt->userData, name);
5631
if ((cur == NULL) && (ctxt->userData==ctxt)) {
5632
cur = xmlSAX2GetEntity(ctxt, name);
5636
if (cur->orig != NULL)
5643
if (value != NULL) xmlFree(value);
5644
if (URI != NULL) xmlFree(URI);
5645
if (literal != NULL) xmlFree(literal);
5650
* xmlParseDefaultDecl:
5651
* @ctxt: an XML parser context
5652
* @value: Receive a possible fixed default value for the attribute
5654
* Parse an attribute default declaration
5656
* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5658
* [ VC: Required Attribute ]
5659
* if the default declaration is the keyword #REQUIRED, then the
5660
* attribute must be specified for all elements of the type in the
5661
* attribute-list declaration.
5663
* [ VC: Attribute Default Legal ]
5664
* The declared default value must meet the lexical constraints of
5665
* the declared attribute type c.f. xmlValidateAttributeDecl()
5667
* [ VC: Fixed Attribute Default ]
5668
* if an attribute has a default value declared with the #FIXED
5669
* keyword, instances of that attribute must match the default value.
5671
* [ WFC: No < in Attribute Values ]
5672
* handled in xmlParseAttValue()
5674
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5675
* or XML_ATTRIBUTE_FIXED.
5679
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5684
if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5686
return(XML_ATTRIBUTE_REQUIRED);
5688
if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5690
return(XML_ATTRIBUTE_IMPLIED);
5692
val = XML_ATTRIBUTE_NONE;
5693
if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5695
val = XML_ATTRIBUTE_FIXED;
5696
if (!IS_BLANK_CH(CUR)) {
5697
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5698
"Space required after '#FIXED'\n");
5702
ret = xmlParseAttValue(ctxt);
5703
ctxt->instate = XML_PARSER_DTD;
5705
xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5706
"Attribute default value declaration error\n");
5713
* xmlParseNotationType:
5714
* @ctxt: an XML parser context
5716
* parse an Notation attribute type.
5718
* Note: the leading 'NOTATION' S part has already being parsed...
5720
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5722
* [ VC: Notation Attributes ]
5723
* Values of this type must match one of the notation names included
5724
* in the declaration; all notation names in the declaration must be declared.
5726
* Returns: the notation attribute tree built while parsing
5730
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5731
const xmlChar *name;
5732
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5735
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5742
name = xmlParseName(ctxt);
5744
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5745
"Name expected in NOTATION declaration\n");
5746
xmlFreeEnumeration(ret);
5750
while (tmp != NULL) {
5751
if (xmlStrEqual(name, tmp->name)) {
5752
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5753
"standalone: attribute notation value token %s duplicated\n",
5755
if (!xmlDictOwns(ctxt->dict, name))
5756
xmlFree((xmlChar *) name);
5762
cur = xmlCreateEnumeration(name);
5764
xmlFreeEnumeration(ret);
5767
if (last == NULL) ret = last = cur;
5774
} while (RAW == '|');
5776
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5777
xmlFreeEnumeration(ret);
5785
* xmlParseEnumerationType:
5786
* @ctxt: an XML parser context
5788
* parse an Enumeration attribute type.
5790
* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5792
* [ VC: Enumeration ]
5793
* Values of this type must match one of the Nmtoken tokens in
5796
* Returns: the enumeration attribute tree built while parsing
5800
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5802
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5805
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5812
name = xmlParseNmtoken(ctxt);
5814
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5818
while (tmp != NULL) {
5819
if (xmlStrEqual(name, tmp->name)) {
5820
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5821
"standalone: attribute enumeration value token %s duplicated\n",
5823
if (!xmlDictOwns(ctxt->dict, name))
5830
cur = xmlCreateEnumeration(name);
5831
if (!xmlDictOwns(ctxt->dict, name))
5834
xmlFreeEnumeration(ret);
5837
if (last == NULL) ret = last = cur;
5844
} while (RAW == '|');
5846
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5854
* xmlParseEnumeratedType:
5855
* @ctxt: an XML parser context
5856
* @tree: the enumeration tree built while parsing
5858
* parse an Enumerated attribute type.
5860
* [57] EnumeratedType ::= NotationType | Enumeration
5862
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5865
* Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5869
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5870
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5872
if (!IS_BLANK_CH(CUR)) {
5873
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5874
"Space required after 'NOTATION'\n");
5878
*tree = xmlParseNotationType(ctxt);
5879
if (*tree == NULL) return(0);
5880
return(XML_ATTRIBUTE_NOTATION);
5882
*tree = xmlParseEnumerationType(ctxt);
5883
if (*tree == NULL) return(0);
5884
return(XML_ATTRIBUTE_ENUMERATION);
5888
* xmlParseAttributeType:
5889
* @ctxt: an XML parser context
5890
* @tree: the enumeration tree built while parsing
5892
* parse the Attribute list def for an element
5894
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
5896
* [55] StringType ::= 'CDATA'
5898
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5899
* 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5901
* Validity constraints for attribute values syntax are checked in
5902
* xmlValidateAttributeValue()
5905
* Values of type ID must match the Name production. A name must not
5906
* appear more than once in an XML document as a value of this type;
5907
* i.e., ID values must uniquely identify the elements which bear them.
5909
* [ VC: One ID per Element Type ]
5910
* No element type may have more than one ID attribute specified.
5912
* [ VC: ID Attribute Default ]
5913
* An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5916
* Values of type IDREF must match the Name production, and values
5917
* of type IDREFS must match Names; each IDREF Name must match the value
5918
* of an ID attribute on some element in the XML document; i.e. IDREF
5919
* values must match the value of some ID attribute.
5921
* [ VC: Entity Name ]
5922
* Values of type ENTITY must match the Name production, values
5923
* of type ENTITIES must match Names; each Entity Name must match the
5924
* name of an unparsed entity declared in the DTD.
5926
* [ VC: Name Token ]
5927
* Values of type NMTOKEN must match the Nmtoken production; values
5928
* of type NMTOKENS must match Nmtokens.
5930
* Returns the attribute type
5933
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5935
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5937
return(XML_ATTRIBUTE_CDATA);
5938
} else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5940
return(XML_ATTRIBUTE_IDREFS);
5941
} else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5943
return(XML_ATTRIBUTE_IDREF);
5944
} else if ((RAW == 'I') && (NXT(1) == 'D')) {
5946
return(XML_ATTRIBUTE_ID);
5947
} else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5949
return(XML_ATTRIBUTE_ENTITY);
5950
} else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5952
return(XML_ATTRIBUTE_ENTITIES);
5953
} else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5955
return(XML_ATTRIBUTE_NMTOKENS);
5956
} else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5958
return(XML_ATTRIBUTE_NMTOKEN);
5960
return(xmlParseEnumeratedType(ctxt, tree));
5964
* xmlParseAttributeListDecl:
5965
* @ctxt: an XML parser context
5967
* : parse the Attribute list def for an element
5969
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5971
* [53] AttDef ::= S Name S AttType S DefaultDecl
5975
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5976
const xmlChar *elemName;
5977
const xmlChar *attrName;
5978
xmlEnumerationPtr tree;
5980
if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5981
xmlParserInputPtr input = ctxt->input;
5984
if (!IS_BLANK_CH(CUR)) {
5985
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5986
"Space required after '<!ATTLIST'\n");
5989
elemName = xmlParseName(ctxt);
5990
if (elemName == NULL) {
5991
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5992
"ATTLIST: no name for Element\n");
5997
while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5998
const xmlChar *check = CUR_PTR;
6001
xmlChar *defaultValue = NULL;
6005
attrName = xmlParseName(ctxt);
6006
if (attrName == NULL) {
6007
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6008
"ATTLIST: no name for Attribute\n");
6012
if (!IS_BLANK_CH(CUR)) {
6013
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6014
"Space required after the attribute name\n");
6019
type = xmlParseAttributeType(ctxt, &tree);
6025
if (!IS_BLANK_CH(CUR)) {
6026
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6027
"Space required after the attribute type\n");
6029
xmlFreeEnumeration(tree);
6034
def = xmlParseDefaultDecl(ctxt, &defaultValue);
6036
if (defaultValue != NULL)
6037
xmlFree(defaultValue);
6039
xmlFreeEnumeration(tree);
6042
if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6043
xmlAttrNormalizeSpace(defaultValue, defaultValue);
6047
if (!IS_BLANK_CH(CUR)) {
6048
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6049
"Space required after the attribute default value\n");
6050
if (defaultValue != NULL)
6051
xmlFree(defaultValue);
6053
xmlFreeEnumeration(tree);
6058
if (check == CUR_PTR) {
6059
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6060
"in xmlParseAttributeListDecl\n");
6061
if (defaultValue != NULL)
6062
xmlFree(defaultValue);
6064
xmlFreeEnumeration(tree);
6067
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6068
(ctxt->sax->attributeDecl != NULL))
6069
ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6070
type, def, defaultValue, tree);
6071
else if (tree != NULL)
6072
xmlFreeEnumeration(tree);
6074
if ((ctxt->sax2) && (defaultValue != NULL) &&
6075
(def != XML_ATTRIBUTE_IMPLIED) &&
6076
(def != XML_ATTRIBUTE_REQUIRED)) {
6077
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6080
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6082
if (defaultValue != NULL)
6083
xmlFree(defaultValue);
6087
if (input != ctxt->input) {
6088
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6089
"Attribute list declaration doesn't start and stop in the same entity\n",
6098
* xmlParseElementMixedContentDecl:
6099
* @ctxt: an XML parser context
6100
* @inputchk: the input used for the current entity, needed for boundary checks
6102
* parse the declaration for a Mixed Element content
6103
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6105
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6106
* '(' S? '#PCDATA' S? ')'
6108
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6110
* [ VC: No Duplicate Types ]
6111
* The same name must not appear more than once in a single
6112
* mixed-content declaration.
6114
* returns: the list of the xmlElementContentPtr describing the element choices
6116
xmlElementContentPtr
6117
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6118
xmlElementContentPtr ret = NULL, cur = NULL, n;
6119
const xmlChar *elem = NULL;
6122
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6127
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6128
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6129
"Element content declaration doesn't start and stop in the same entity\n",
6133
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6137
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6142
if ((RAW == '(') || (RAW == '|')) {
6143
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6144
if (ret == NULL) return(NULL);
6146
while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6149
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6150
if (ret == NULL) return(NULL);
6156
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6157
if (n == NULL) return(NULL);
6158
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6167
elem = xmlParseName(ctxt);
6169
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6170
"xmlParseElementMixedContentDecl : Name expected\n");
6171
xmlFreeDocElementContent(ctxt->myDoc, cur);
6177
if ((RAW == ')') && (NXT(1) == '*')) {
6179
cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6180
XML_ELEMENT_CONTENT_ELEMENT);
6181
if (cur->c2 != NULL)
6182
cur->c2->parent = cur;
6185
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6186
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6187
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6188
"Element content declaration doesn't start and stop in the same entity\n",
6193
xmlFreeDocElementContent(ctxt->myDoc, ret);
6194
xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6199
xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6205
* xmlParseElementChildrenContentDeclPriv:
6206
* @ctxt: an XML parser context
6207
* @inputchk: the input used for the current entity, needed for boundary checks
6208
* @depth: the level of recursion
6210
* parse the declaration for a Mixed Element content
6211
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6214
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6216
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6218
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6220
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6222
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6223
* TODO Parameter-entity replacement text must be properly nested
6224
* with parenthesized groups. That is to say, if either of the
6225
* opening or closing parentheses in a choice, seq, or Mixed
6226
* construct is contained in the replacement text for a parameter
6227
* entity, both must be contained in the same replacement text. For
6228
* interoperability, if a parameter-entity reference appears in a
6229
* choice, seq, or Mixed construct, its replacement text should not
6230
* be empty, and neither the first nor last non-blank character of
6231
* the replacement text should be a connector (| or ,).
6233
* Returns the tree of xmlElementContentPtr describing the element
6236
static xmlElementContentPtr
6237
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6239
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6240
const xmlChar *elem;
6243
if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6245
xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6246
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6253
int inputid = ctxt->input->id;
6255
/* Recurse on first child */
6258
cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6263
elem = xmlParseName(ctxt);
6265
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6268
cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6270
xmlErrMemory(ctxt, NULL);
6275
cur->ocur = XML_ELEMENT_CONTENT_OPT;
6277
} else if (RAW == '*') {
6278
cur->ocur = XML_ELEMENT_CONTENT_MULT;
6280
} else if (RAW == '+') {
6281
cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6284
cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6290
while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6292
* Each loop we parse one separator and one element.
6295
if (type == 0) type = CUR;
6298
* Detect "Name | Name , Name" error
6300
else if (type != CUR) {
6301
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6302
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6304
if ((last != NULL) && (last != ret))
6305
xmlFreeDocElementContent(ctxt->myDoc, last);
6307
xmlFreeDocElementContent(ctxt->myDoc, ret);
6312
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6314
if ((last != NULL) && (last != ret))
6315
xmlFreeDocElementContent(ctxt->myDoc, last);
6316
xmlFreeDocElementContent(ctxt->myDoc, ret);
6334
} else if (RAW == '|') {
6335
if (type == 0) type = CUR;
6338
* Detect "Name , Name | Name" error
6340
else if (type != CUR) {
6341
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6342
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6344
if ((last != NULL) && (last != ret))
6345
xmlFreeDocElementContent(ctxt->myDoc, last);
6347
xmlFreeDocElementContent(ctxt->myDoc, ret);
6352
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6354
if ((last != NULL) && (last != ret))
6355
xmlFreeDocElementContent(ctxt->myDoc, last);
6357
xmlFreeDocElementContent(ctxt->myDoc, ret);
6376
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6377
if ((last != NULL) && (last != ret))
6378
xmlFreeDocElementContent(ctxt->myDoc, last);
6380
xmlFreeDocElementContent(ctxt->myDoc, ret);
6387
int inputid = ctxt->input->id;
6388
/* Recurse on second child */
6391
last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6395
elem = xmlParseName(ctxt);
6397
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6399
xmlFreeDocElementContent(ctxt->myDoc, ret);
6402
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6405
xmlFreeDocElementContent(ctxt->myDoc, ret);
6409
last->ocur = XML_ELEMENT_CONTENT_OPT;
6411
} else if (RAW == '*') {
6412
last->ocur = XML_ELEMENT_CONTENT_MULT;
6414
} else if (RAW == '+') {
6415
last->ocur = XML_ELEMENT_CONTENT_PLUS;
6418
last->ocur = XML_ELEMENT_CONTENT_ONCE;
6424
if ((cur != NULL) && (last != NULL)) {
6429
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6430
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6431
"Element content declaration doesn't start and stop in the same entity\n",
6437
if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6438
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6441
ret->ocur = XML_ELEMENT_CONTENT_OPT;
6444
} else if (RAW == '*') {
6446
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6449
* Some normalization:
6450
* (a | b* | c?)* == (a | b | c)*
6452
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6453
if ((cur->c1 != NULL) &&
6454
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6456
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6457
if ((cur->c2 != NULL) &&
6458
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6459
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6460
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6465
} else if (RAW == '+') {
6469
if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6470
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6471
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6473
ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6475
* Some normalization:
6476
* (a | b*)+ == (a | b)*
6477
* (a | b?)+ == (a | b)*
6479
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6480
if ((cur->c1 != NULL) &&
6481
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6486
if ((cur->c2 != NULL) &&
6487
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6495
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6503
* xmlParseElementChildrenContentDecl:
6504
* @ctxt: an XML parser context
6505
* @inputchk: the input used for the current entity, needed for boundary checks
6507
* parse the declaration for a Mixed Element content
6508
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6510
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6512
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6514
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6516
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6518
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6519
* TODO Parameter-entity replacement text must be properly nested
6520
* with parenthesized groups. That is to say, if either of the
6521
* opening or closing parentheses in a choice, seq, or Mixed
6522
* construct is contained in the replacement text for a parameter
6523
* entity, both must be contained in the same replacement text. For
6524
* interoperability, if a parameter-entity reference appears in a
6525
* choice, seq, or Mixed construct, its replacement text should not
6526
* be empty, and neither the first nor last non-blank character of
6527
* the replacement text should be a connector (| or ,).
6529
* Returns the tree of xmlElementContentPtr describing the element
6532
xmlElementContentPtr
6533
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6534
/* stub left for API/ABI compat */
6535
return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6539
* xmlParseElementContentDecl:
6540
* @ctxt: an XML parser context
6541
* @name: the name of the element being defined.
6542
* @result: the Element Content pointer will be stored here if any
6544
* parse the declaration for an Element content either Mixed or Children,
6545
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6547
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6549
* returns: the type of element content XML_ELEMENT_TYPE_xxx
6553
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6554
xmlElementContentPtr *result) {
6556
xmlElementContentPtr tree = NULL;
6557
int inputid = ctxt->input->id;
6563
xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6564
"xmlParseElementContentDecl : %s '(' expected\n", name);
6569
if (ctxt->instate == XML_PARSER_EOF)
6572
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6573
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6574
res = XML_ELEMENT_TYPE_MIXED;
6576
tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6577
res = XML_ELEMENT_TYPE_ELEMENT;
6585
* xmlParseElementDecl:
6586
* @ctxt: an XML parser context
6588
* parse an Element declaration.
6590
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6592
* [ VC: Unique Element Type Declaration ]
6593
* No element type may be declared more than once
6595
* Returns the type of the element, or -1 in case of error
6598
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6599
const xmlChar *name;
6601
xmlElementContentPtr content = NULL;
6603
/* GROW; done in the caller */
6604
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6605
xmlParserInputPtr input = ctxt->input;
6608
if (!IS_BLANK_CH(CUR)) {
6609
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6610
"Space required after 'ELEMENT'\n");
6613
name = xmlParseName(ctxt);
6615
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6616
"xmlParseElementDecl: no name for Element\n");
6619
while ((RAW == 0) && (ctxt->inputNr > 1))
6621
if (!IS_BLANK_CH(CUR)) {
6622
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6623
"Space required after the element name\n");
6626
if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6629
* Element must always be empty.
6631
ret = XML_ELEMENT_TYPE_EMPTY;
6632
} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6636
* Element is a generic container.
6638
ret = XML_ELEMENT_TYPE_ANY;
6639
} else if (RAW == '(') {
6640
ret = xmlParseElementContentDecl(ctxt, name, &content);
6643
* [ WFC: PEs in Internal Subset ] error handling.
6645
if ((RAW == '%') && (ctxt->external == 0) &&
6646
(ctxt->inputNr == 1)) {
6647
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6648
"PEReference: forbidden within markup decl in internal subset\n");
6650
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6651
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6658
* Pop-up of finished entities.
6660
while ((RAW == 0) && (ctxt->inputNr > 1))
6665
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6666
if (content != NULL) {
6667
xmlFreeDocElementContent(ctxt->myDoc, content);
6670
if (input != ctxt->input) {
6671
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6672
"Element declaration doesn't start and stop in the same entity\n");
6676
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6677
(ctxt->sax->elementDecl != NULL)) {
6678
if (content != NULL)
6679
content->parent = NULL;
6680
ctxt->sax->elementDecl(ctxt->userData, name, ret,
6682
if ((content != NULL) && (content->parent == NULL)) {
6684
* this is a trick: if xmlAddElementDecl is called,
6685
* instead of copying the full tree it is plugged directly
6686
* if called from the parser. Avoid duplicating the
6687
* interfaces or change the API/ABI
6689
xmlFreeDocElementContent(ctxt->myDoc, content);
6691
} else if (content != NULL) {
6692
xmlFreeDocElementContent(ctxt->myDoc, content);
6700
* xmlParseConditionalSections
6701
* @ctxt: an XML parser context
6703
* [61] conditionalSect ::= includeSect | ignoreSect
6704
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6705
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6706
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6707
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6711
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6712
int id = ctxt->input->id;
6716
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6720
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6722
if (ctxt->input->id != id) {
6723
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6724
"All markup of the conditional section is not in the same entity\n",
6729
if (xmlParserDebugEntities) {
6730
if ((ctxt->input != NULL) && (ctxt->input->filename))
6731
xmlGenericError(xmlGenericErrorContext,
6732
"%s(%d): ", ctxt->input->filename,
6734
xmlGenericError(xmlGenericErrorContext,
6735
"Entering INCLUDE Conditional Section\n");
6738
while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6739
(NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6740
const xmlChar *check = CUR_PTR;
6741
unsigned int cons = ctxt->input->consumed;
6743
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6744
xmlParseConditionalSections(ctxt);
6745
} else if (IS_BLANK_CH(CUR)) {
6747
} else if (RAW == '%') {
6748
xmlParsePEReference(ctxt);
6750
xmlParseMarkupDecl(ctxt);
6753
* Pop-up of finished entities.
6755
while ((RAW == 0) && (ctxt->inputNr > 1))
6758
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6759
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6763
if (xmlParserDebugEntities) {
6764
if ((ctxt->input != NULL) && (ctxt->input->filename))
6765
xmlGenericError(xmlGenericErrorContext,
6766
"%s(%d): ", ctxt->input->filename,
6768
xmlGenericError(xmlGenericErrorContext,
6769
"Leaving INCLUDE Conditional Section\n");
6772
} else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6774
xmlParserInputState instate;
6780
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6782
if (ctxt->input->id != id) {
6783
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6784
"All markup of the conditional section is not in the same entity\n",
6789
if (xmlParserDebugEntities) {
6790
if ((ctxt->input != NULL) && (ctxt->input->filename))
6791
xmlGenericError(xmlGenericErrorContext,
6792
"%s(%d): ", ctxt->input->filename,
6794
xmlGenericError(xmlGenericErrorContext,
6795
"Entering IGNORE Conditional Section\n");
6799
* Parse up to the end of the conditional section
6800
* But disable SAX event generating DTD building in the meantime
6802
state = ctxt->disableSAX;
6803
instate = ctxt->instate;
6804
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6805
ctxt->instate = XML_PARSER_IGNORE;
6807
while (((depth >= 0) && (RAW != 0)) &&
6808
(ctxt->instate != XML_PARSER_EOF)) {
6809
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6814
if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6815
if (--depth >= 0) SKIP(3);
6822
ctxt->disableSAX = state;
6823
ctxt->instate = instate;
6825
if (xmlParserDebugEntities) {
6826
if ((ctxt->input != NULL) && (ctxt->input->filename))
6827
xmlGenericError(xmlGenericErrorContext,
6828
"%s(%d): ", ctxt->input->filename,
6830
xmlGenericError(xmlGenericErrorContext,
6831
"Leaving IGNORE Conditional Section\n");
6835
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6842
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6844
if (ctxt->input->id != id) {
6845
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6846
"All markup of the conditional section is not in the same entity\n",
6854
* xmlParseMarkupDecl:
6855
* @ctxt: an XML parser context
6857
* parse Markup declarations
6859
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6860
* NotationDecl | PI | Comment
6862
* [ VC: Proper Declaration/PE Nesting ]
6863
* Parameter-entity replacement text must be properly nested with
6864
* markup declarations. That is to say, if either the first character
6865
* or the last character of a markup declaration (markupdecl above) is
6866
* contained in the replacement text for a parameter-entity reference,
6867
* both must be contained in the same replacement text.
6869
* [ WFC: PEs in Internal Subset ]
6870
* In the internal DTD subset, parameter-entity references can occur
6871
* only where markup declarations can occur, not within markup declarations.
6872
* (This does not apply to references that occur in external parameter
6873
* entities or to the external subset.)
6876
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6879
if (NXT(1) == '!') {
6883
xmlParseElementDecl(ctxt);
6884
else if (NXT(3) == 'N')
6885
xmlParseEntityDecl(ctxt);
6888
xmlParseAttributeListDecl(ctxt);
6891
xmlParseNotationDecl(ctxt);
6894
xmlParseComment(ctxt);
6897
/* there is an error but it will be detected later */
6900
} else if (NXT(1) == '?') {
6905
* This is only for internal subset. On external entities,
6906
* the replacement is done before parsing stage
6908
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6909
xmlParsePEReference(ctxt);
6912
* Conditional sections are allowed from entities included
6913
* by PE References in the internal subset.
6915
if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6916
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6917
xmlParseConditionalSections(ctxt);
6921
ctxt->instate = XML_PARSER_DTD;
6926
* @ctxt: an XML parser context
6928
* parse an XML declaration header for external entities
6930
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6934
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6936
const xmlChar *encoding;
6939
* We know that '<?xml' is here.
6941
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6944
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6948
if (!IS_BLANK_CH(CUR)) {
6949
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6950
"Space needed after '<?xml'\n");
6955
* We may have the VersionInfo here.
6957
version = xmlParseVersionInfo(ctxt);
6958
if (version == NULL)
6959
version = xmlCharStrdup(XML_DEFAULT_VERSION);
6961
if (!IS_BLANK_CH(CUR)) {
6962
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6963
"Space needed here\n");
6966
ctxt->input->version = version;
6969
* We must have the encoding declaration
6971
encoding = xmlParseEncodingDecl(ctxt);
6972
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6974
* The XML REC instructs us to stop parsing right here
6978
if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6979
xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6980
"Missing encoding in text declaration\n");
6984
if ((RAW == '?') && (NXT(1) == '>')) {
6986
} else if (RAW == '>') {
6987
/* Deprecated old WD ... */
6988
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6991
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6992
MOVETO_ENDTAG(CUR_PTR);
6998
* xmlParseExternalSubset:
6999
* @ctxt: an XML parser context
7000
* @ExternalID: the external identifier
7001
* @SystemID: the system identifier (or URL)
7003
* parse Markup declarations from an external subset
7005
* [30] extSubset ::= textDecl? extSubsetDecl
7007
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7010
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7011
const xmlChar *SystemID) {
7012
xmlDetectSAX2(ctxt);
7015
if ((ctxt->encoding == NULL) &&
7016
(ctxt->input->end - ctxt->input->cur >= 4)) {
7018
xmlCharEncoding enc;
7024
enc = xmlDetectCharEncoding(start, 4);
7025
if (enc != XML_CHAR_ENCODING_NONE)
7026
xmlSwitchEncoding(ctxt, enc);
7029
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7030
xmlParseTextDecl(ctxt);
7031
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7033
* The XML REC instructs us to stop parsing right here
7035
ctxt->instate = XML_PARSER_EOF;
7039
if (ctxt->myDoc == NULL) {
7040
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7041
if (ctxt->myDoc == NULL) {
7042
xmlErrMemory(ctxt, "New Doc failed");
7045
ctxt->myDoc->properties = XML_DOC_INTERNAL;
7047
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7048
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7050
ctxt->instate = XML_PARSER_DTD;
7052
while (((RAW == '<') && (NXT(1) == '?')) ||
7053
((RAW == '<') && (NXT(1) == '!')) ||
7054
(RAW == '%') || IS_BLANK_CH(CUR)) {
7055
const xmlChar *check = CUR_PTR;
7056
unsigned int cons = ctxt->input->consumed;
7059
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7060
xmlParseConditionalSections(ctxt);
7061
} else if (IS_BLANK_CH(CUR)) {
7063
} else if (RAW == '%') {
7064
xmlParsePEReference(ctxt);
7066
xmlParseMarkupDecl(ctxt);
7069
* Pop-up of finished entities.
7071
while ((RAW == 0) && (ctxt->inputNr > 1))
7074
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7075
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7081
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7087
* xmlParseReference:
7088
* @ctxt: an XML parser context
7090
* parse and handle entity references in content, depending on the SAX
7091
* interface, this may end-up in a call to character() if this is a
7092
* CharRef, a predefined entity, if there is no reference() callback.
7093
* or if the parser was asked to switch to that mode.
7095
* [67] Reference ::= EntityRef | CharRef
7098
xmlParseReference(xmlParserCtxtPtr ctxt) {
7102
xmlNodePtr list = NULL;
7103
xmlParserErrors ret = XML_ERR_OK;
7110
* Simple case of a CharRef
7112
if (NXT(1) == '#') {
7116
int value = xmlParseCharRef(ctxt);
7120
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7122
* So we are using non-UTF-8 buffers
7123
* Check that the char fit on 8bits, if not
7124
* generate a CharRef.
7126
if (value <= 0xFF) {
7129
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7130
(!ctxt->disableSAX))
7131
ctxt->sax->characters(ctxt->userData, out, 1);
7133
if ((hex == 'x') || (hex == 'X'))
7134
snprintf((char *)out, sizeof(out), "#x%X", value);
7136
snprintf((char *)out, sizeof(out), "#%d", value);
7137
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7138
(!ctxt->disableSAX))
7139
ctxt->sax->reference(ctxt->userData, out);
7143
* Just encode the value in UTF-8
7145
COPY_BUF(0 ,out, i, value);
7147
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7148
(!ctxt->disableSAX))
7149
ctxt->sax->characters(ctxt->userData, out, i);
7155
* We are seeing an entity reference
7157
ent = xmlParseEntityRef(ctxt);
7158
if (ent == NULL) return;
7159
if (!ctxt->wellFormed)
7161
was_checked = ent->checked;
7163
/* special case of predefined entities */
7164
if ((ent->name == NULL) ||
7165
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7167
if (val == NULL) return;
7169
* inline the entity.
7171
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7172
(!ctxt->disableSAX))
7173
ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7178
* The first reference to the entity trigger a parsing phase
7179
* where the ent->children is filled with the result from
7181
* Note: external parsed entities will not be loaded, it is not
7182
* required for a non-validating parser, unless the parsing option
7183
* of validating, or substituting entities were given. Doing so is
7184
* far more secure as the parser will only process data coming from
7185
* the document entity by default.
7187
if ((ent->checked == 0) &&
7188
((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7189
(ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7190
unsigned long oldnbent = ctxt->nbentities;
7193
* This is a bit hackish but this seems the best
7194
* way to make sure both SAX and DOM entity support
7198
if (ctxt->userData == ctxt)
7201
user_data = ctxt->userData;
7204
* Check that this entity is well formed
7205
* 4.3.2: An internal general parsed entity is well-formed
7206
* if its replacement text matches the production labeled
7209
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7211
ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7215
} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7217
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7218
user_data, ctxt->depth, ent->URI,
7219
ent->ExternalID, &list);
7222
ret = XML_ERR_ENTITY_PE_INTERNAL;
7223
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7224
"invalid entity type found\n", NULL);
7228
* Store the number of entities needing parsing for this entity
7229
* content and do checkings
7231
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7232
if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7234
if (ret == XML_ERR_ENTITY_LOOP) {
7235
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7236
xmlFreeNodeList(list);
7239
if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7240
xmlFreeNodeList(list);
7244
if ((ret == XML_ERR_OK) && (list != NULL)) {
7245
if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7246
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7247
(ent->children == NULL)) {
7248
ent->children = list;
7249
if (ctxt->replaceEntities) {
7251
* Prune it directly in the generated document
7252
* except for single text nodes.
7254
if (((list->type == XML_TEXT_NODE) &&
7255
(list->next == NULL)) ||
7256
(ctxt->parseMode == XML_PARSE_READER)) {
7257
list->parent = (xmlNodePtr) ent;
7262
while (list != NULL) {
7263
list->parent = (xmlNodePtr) ctxt->node;
7264
list->doc = ctxt->myDoc;
7265
if (list->next == NULL)
7269
list = ent->children;
7270
#ifdef LIBXML_LEGACY_ENABLED
7271
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7272
xmlAddEntityReference(ent, list, NULL);
7273
#endif /* LIBXML_LEGACY_ENABLED */
7277
while (list != NULL) {
7278
list->parent = (xmlNodePtr) ent;
7279
xmlSetTreeDoc(list, ent->doc);
7280
if (list->next == NULL)
7286
xmlFreeNodeList(list);
7289
} else if ((ret != XML_ERR_OK) &&
7290
(ret != XML_WAR_UNDECLARED_ENTITY)) {
7291
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7292
"Entity '%s' failed to parse\n", ent->name);
7293
} else if (list != NULL) {
7294
xmlFreeNodeList(list);
7297
if (ent->checked == 0)
7299
} else if (ent->checked != 1) {
7300
ctxt->nbentities += ent->checked / 2;
7304
* Now that the entity content has been gathered
7305
* provide it to the application, this can take different forms based
7306
* on the parsing modes.
7308
if (ent->children == NULL) {
7310
* Probably running in SAX mode and the callbacks don't
7311
* build the entity content. So unless we already went
7312
* though parsing for first checking go though the entity
7313
* content to generate callbacks associated to the entity
7315
if (was_checked != 0) {
7318
* This is a bit hackish but this seems the best
7319
* way to make sure both SAX and DOM entity support
7322
if (ctxt->userData == ctxt)
7325
user_data = ctxt->userData;
7327
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7329
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7330
ent->content, user_data, NULL);
7332
} else if (ent->etype ==
7333
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7335
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7336
ctxt->sax, user_data, ctxt->depth,
7337
ent->URI, ent->ExternalID, NULL);
7340
ret = XML_ERR_ENTITY_PE_INTERNAL;
7341
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7342
"invalid entity type found\n", NULL);
7344
if (ret == XML_ERR_ENTITY_LOOP) {
7345
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7349
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7350
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7352
* Entity reference callback comes second, it's somewhat
7353
* superfluous but a compatibility to historical behaviour
7355
ctxt->sax->reference(ctxt->userData, ent->name);
7361
* If we didn't get any children for the entity being built
7363
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7364
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7368
ctxt->sax->reference(ctxt->userData, ent->name);
7372
if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7374
* There is a problem on the handling of _private for entities
7375
* (bug 155816): Should we copy the content of the field from
7376
* the entity (possibly overwriting some value set by the user
7377
* when a copy is created), should we leave it alone, or should
7378
* we try to take care of different situations? The problem
7379
* is exacerbated by the usage of this field by the xmlReader.
7380
* To fix this bug, we look at _private on the created node
7381
* and, if it's NULL, we copy in whatever was in the entity.
7382
* If it's not NULL we leave it alone. This is somewhat of a
7383
* hack - maybe we should have further tests to determine
7386
if ((ctxt->node != NULL) && (ent->children != NULL)) {
7388
* Seems we are generating the DOM content, do
7389
* a simple tree copy for all references except the first
7390
* In the first occurrence list contains the replacement.
7392
if (((list == NULL) && (ent->owner == 0)) ||
7393
(ctxt->parseMode == XML_PARSE_READER)) {
7394
xmlNodePtr nw = NULL, cur, firstChild = NULL;
7397
* We are copying here, make sure there is no abuse
7399
ctxt->sizeentcopy += ent->length;
7400
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7404
* when operating on a reader, the entities definitions
7405
* are always owning the entities subtree.
7406
if (ctxt->parseMode == XML_PARSE_READER)
7410
cur = ent->children;
7411
while (cur != NULL) {
7412
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7414
if (nw->_private == NULL)
7415
nw->_private = cur->_private;
7416
if (firstChild == NULL){
7419
nw = xmlAddChild(ctxt->node, nw);
7421
if (cur == ent->last) {
7423
* needed to detect some strange empty
7424
* node cases in the reader tests
7426
if ((ctxt->parseMode == XML_PARSE_READER) &&
7428
(nw->type == XML_ELEMENT_NODE) &&
7429
(nw->children == NULL))
7436
#ifdef LIBXML_LEGACY_ENABLED
7437
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7438
xmlAddEntityReference(ent, firstChild, nw);
7439
#endif /* LIBXML_LEGACY_ENABLED */
7440
} else if ((list == NULL) || (ctxt->inputNr > 0)) {
7441
xmlNodePtr nw = NULL, cur, next, last,
7445
* We are copying here, make sure there is no abuse
7447
ctxt->sizeentcopy += ent->length;
7448
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7452
* Copy the entity child list and make it the new
7453
* entity child list. The goal is to make sure any
7454
* ID or REF referenced will be the one from the
7455
* document content and not the entity copy.
7457
cur = ent->children;
7458
ent->children = NULL;
7461
while (cur != NULL) {
7465
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7467
if (nw->_private == NULL)
7468
nw->_private = cur->_private;
7469
if (firstChild == NULL){
7472
xmlAddChild((xmlNodePtr) ent, nw);
7473
xmlAddChild(ctxt->node, cur);
7479
if (ent->owner == 0)
7481
#ifdef LIBXML_LEGACY_ENABLED
7482
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7483
xmlAddEntityReference(ent, firstChild, nw);
7484
#endif /* LIBXML_LEGACY_ENABLED */
7486
const xmlChar *nbktext;
7489
* the name change is to avoid coalescing of the
7490
* node with a possible previous text one which
7491
* would make ent->children a dangling pointer
7493
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7495
if (ent->children->type == XML_TEXT_NODE)
7496
ent->children->name = nbktext;
7497
if ((ent->last != ent->children) &&
7498
(ent->last->type == XML_TEXT_NODE))
7499
ent->last->name = nbktext;
7500
xmlAddChildList(ctxt->node, ent->children);
7504
* This is to avoid a nasty side effect, see
7505
* characters() in SAX.c
7515
* xmlParseEntityRef:
7516
* @ctxt: an XML parser context
7518
* parse ENTITY references declarations
7520
* [68] EntityRef ::= '&' Name ';'
7522
* [ WFC: Entity Declared ]
7523
* In a document without any DTD, a document with only an internal DTD
7524
* subset which contains no parameter entity references, or a document
7525
* with "standalone='yes'", the Name given in the entity reference
7526
* must match that in an entity declaration, except that well-formed
7527
* documents need not declare any of the following entities: amp, lt,
7528
* gt, apos, quot. The declaration of a parameter entity must precede
7529
* any reference to it. Similarly, the declaration of a general entity
7530
* must precede any reference to it which appears in a default value in an
7531
* attribute-list declaration. Note that if entities are declared in the
7532
* external subset or in external parameter entities, a non-validating
7533
* processor is not obligated to read and process their declarations;
7534
* for such documents, the rule that an entity must be declared is a
7535
* well-formedness constraint only if standalone='yes'.
7537
* [ WFC: Parsed Entity ]
7538
* An entity reference must not contain the name of an unparsed entity
7540
* Returns the xmlEntityPtr if found, or NULL otherwise.
7543
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7544
const xmlChar *name;
7545
xmlEntityPtr ent = NULL;
7548
if (ctxt->instate == XML_PARSER_EOF)
7554
name = xmlParseName(ctxt);
7556
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7557
"xmlParseEntityRef: no name\n");
7561
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7567
* Predefined entities override any extra definition
7569
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7570
ent = xmlGetPredefinedEntity(name);
7576
* Increase the number of entity references parsed
7581
* Ask first SAX for entity resolution, otherwise try the
7582
* entities which may have stored in the parser context.
7584
if (ctxt->sax != NULL) {
7585
if (ctxt->sax->getEntity != NULL)
7586
ent = ctxt->sax->getEntity(ctxt->userData, name);
7587
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7588
(ctxt->options & XML_PARSE_OLDSAX))
7589
ent = xmlGetPredefinedEntity(name);
7590
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7591
(ctxt->userData==ctxt)) {
7592
ent = xmlSAX2GetEntity(ctxt, name);
7595
if (ctxt->instate == XML_PARSER_EOF)
7598
* [ WFC: Entity Declared ]
7599
* In a document without any DTD, a document with only an
7600
* internal DTD subset which contains no parameter entity
7601
* references, or a document with "standalone='yes'", the
7602
* Name given in the entity reference must match that in an
7603
* entity declaration, except that well-formed documents
7604
* need not declare any of the following entities: amp, lt,
7606
* The declaration of a parameter entity must precede any
7608
* Similarly, the declaration of a general entity must
7609
* precede any reference to it which appears in a default
7610
* value in an attribute-list declaration. Note that if
7611
* entities are declared in the external subset or in
7612
* external parameter entities, a non-validating processor
7613
* is not obligated to read and process their declarations;
7614
* for such documents, the rule that an entity must be
7615
* declared is a well-formedness constraint only if
7619
if ((ctxt->standalone == 1) ||
7620
((ctxt->hasExternalSubset == 0) &&
7621
(ctxt->hasPErefs == 0))) {
7622
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7623
"Entity '%s' not defined\n", name);
7625
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7626
"Entity '%s' not defined\n", name);
7627
if ((ctxt->inSubset == 0) &&
7628
(ctxt->sax != NULL) &&
7629
(ctxt->sax->reference != NULL)) {
7630
ctxt->sax->reference(ctxt->userData, name);
7637
* [ WFC: Parsed Entity ]
7638
* An entity reference must not contain the name of an
7641
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7642
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7643
"Entity reference to unparsed entity %s\n", name);
7647
* [ WFC: No External Entity References ]
7648
* Attribute values cannot contain direct or indirect
7649
* entity references to external entities.
7651
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7652
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7653
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7654
"Attribute references external entity '%s'\n", name);
7657
* [ WFC: No < in Attribute Values ]
7658
* The replacement text of any entity referred to directly or
7659
* indirectly in an attribute value (other than "<") must
7662
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7664
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7665
if ((ent->checked & 1) || ((ent->checked == 0) &&
7666
(ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7667
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7668
"'<' in entity '%s' is not allowed in attributes values\n", name);
7673
* Internal check, no parameter entities here ...
7676
switch (ent->etype) {
7677
case XML_INTERNAL_PARAMETER_ENTITY:
7678
case XML_EXTERNAL_PARAMETER_ENTITY:
7679
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7680
"Attempt to reference the parameter entity '%s'\n",
7689
* [ WFC: No Recursion ]
7690
* A parsed entity must not contain a recursive reference
7691
* to itself, either directly or indirectly.
7692
* Done somewhere else
7698
* xmlParseStringEntityRef:
7699
* @ctxt: an XML parser context
7700
* @str: a pointer to an index in the string
7702
* parse ENTITY references declarations, but this version parses it from
7705
* [68] EntityRef ::= '&' Name ';'
7707
* [ WFC: Entity Declared ]
7708
* In a document without any DTD, a document with only an internal DTD
7709
* subset which contains no parameter entity references, or a document
7710
* with "standalone='yes'", the Name given in the entity reference
7711
* must match that in an entity declaration, except that well-formed
7712
* documents need not declare any of the following entities: amp, lt,
7713
* gt, apos, quot. The declaration of a parameter entity must precede
7714
* any reference to it. Similarly, the declaration of a general entity
7715
* must precede any reference to it which appears in a default value in an
7716
* attribute-list declaration. Note that if entities are declared in the
7717
* external subset or in external parameter entities, a non-validating
7718
* processor is not obligated to read and process their declarations;
7719
* for such documents, the rule that an entity must be declared is a
7720
* well-formedness constraint only if standalone='yes'.
7722
* [ WFC: Parsed Entity ]
7723
* An entity reference must not contain the name of an unparsed entity
7725
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7726
* is updated to the current location in the string.
7729
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7733
xmlEntityPtr ent = NULL;
7735
if ((str == NULL) || (*str == NULL))
7743
name = xmlParseStringName(ctxt, &ptr);
7745
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7746
"xmlParseStringEntityRef: no name\n");
7751
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7760
* Predefined entites override any extra definition
7762
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7763
ent = xmlGetPredefinedEntity(name);
7772
* Increate the number of entity references parsed
7777
* Ask first SAX for entity resolution, otherwise try the
7778
* entities which may have stored in the parser context.
7780
if (ctxt->sax != NULL) {
7781
if (ctxt->sax->getEntity != NULL)
7782
ent = ctxt->sax->getEntity(ctxt->userData, name);
7783
if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7784
ent = xmlGetPredefinedEntity(name);
7785
if ((ent == NULL) && (ctxt->userData==ctxt)) {
7786
ent = xmlSAX2GetEntity(ctxt, name);
7789
if (ctxt->instate == XML_PARSER_EOF) {
7795
* [ WFC: Entity Declared ]
7796
* In a document without any DTD, a document with only an
7797
* internal DTD subset which contains no parameter entity
7798
* references, or a document with "standalone='yes'", the
7799
* Name given in the entity reference must match that in an
7800
* entity declaration, except that well-formed documents
7801
* need not declare any of the following entities: amp, lt,
7803
* The declaration of a parameter entity must precede any
7805
* Similarly, the declaration of a general entity must
7806
* precede any reference to it which appears in a default
7807
* value in an attribute-list declaration. Note that if
7808
* entities are declared in the external subset or in
7809
* external parameter entities, a non-validating processor
7810
* is not obligated to read and process their declarations;
7811
* for such documents, the rule that an entity must be
7812
* declared is a well-formedness constraint only if
7816
if ((ctxt->standalone == 1) ||
7817
((ctxt->hasExternalSubset == 0) &&
7818
(ctxt->hasPErefs == 0))) {
7819
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7820
"Entity '%s' not defined\n", name);
7822
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7823
"Entity '%s' not defined\n",
7826
/* TODO ? check regressions ctxt->valid = 0; */
7830
* [ WFC: Parsed Entity ]
7831
* An entity reference must not contain the name of an
7834
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7835
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7836
"Entity reference to unparsed entity %s\n", name);
7840
* [ WFC: No External Entity References ]
7841
* Attribute values cannot contain direct or indirect
7842
* entity references to external entities.
7844
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7845
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7846
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7847
"Attribute references external entity '%s'\n", name);
7850
* [ WFC: No < in Attribute Values ]
7851
* The replacement text of any entity referred to directly or
7852
* indirectly in an attribute value (other than "<") must
7855
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7856
(ent != NULL) && (ent->content != NULL) &&
7857
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7858
(xmlStrchr(ent->content, '<'))) {
7859
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7860
"'<' in entity '%s' is not allowed in attributes values\n",
7865
* Internal check, no parameter entities here ...
7868
switch (ent->etype) {
7869
case XML_INTERNAL_PARAMETER_ENTITY:
7870
case XML_EXTERNAL_PARAMETER_ENTITY:
7871
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7872
"Attempt to reference the parameter entity '%s'\n",
7881
* [ WFC: No Recursion ]
7882
* A parsed entity must not contain a recursive reference
7883
* to itself, either directly or indirectly.
7884
* Done somewhere else
7893
* xmlParsePEReference:
7894
* @ctxt: an XML parser context
7896
* parse PEReference declarations
7897
* The entity content is handled directly by pushing it's content as
7898
* a new input stream.
7900
* [69] PEReference ::= '%' Name ';'
7902
* [ WFC: No Recursion ]
7903
* A parsed entity must not contain a recursive
7904
* reference to itself, either directly or indirectly.
7906
* [ WFC: Entity Declared ]
7907
* In a document without any DTD, a document with only an internal DTD
7908
* subset which contains no parameter entity references, or a document
7909
* with "standalone='yes'", ... ... The declaration of a parameter
7910
* entity must precede any reference to it...
7912
* [ VC: Entity Declared ]
7913
* In a document with an external subset or external parameter entities
7914
* with "standalone='no'", ... ... The declaration of a parameter entity
7915
* must precede any reference to it...
7918
* Parameter-entity references may only appear in the DTD.
7919
* NOTE: misleading but this is handled.
7922
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7924
const xmlChar *name;
7925
xmlEntityPtr entity = NULL;
7926
xmlParserInputPtr input;
7931
name = xmlParseName(ctxt);
7933
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7934
"xmlParsePEReference: no name\n");
7938
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7945
* Increate the number of entity references parsed
7950
* Request the entity from SAX
7952
if ((ctxt->sax != NULL) &&
7953
(ctxt->sax->getParameterEntity != NULL))
7954
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7955
if (ctxt->instate == XML_PARSER_EOF)
7957
if (entity == NULL) {
7959
* [ WFC: Entity Declared ]
7960
* In a document without any DTD, a document with only an
7961
* internal DTD subset which contains no parameter entity
7962
* references, or a document with "standalone='yes'", ...
7963
* ... The declaration of a parameter entity must precede
7964
* any reference to it...
7966
if ((ctxt->standalone == 1) ||
7967
((ctxt->hasExternalSubset == 0) &&
7968
(ctxt->hasPErefs == 0))) {
7969
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7970
"PEReference: %%%s; not found\n",
7974
* [ VC: Entity Declared ]
7975
* In a document with an external subset or external
7976
* parameter entities with "standalone='no'", ...
7977
* ... The declaration of a parameter entity must
7978
* precede any reference to it...
7980
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7981
"PEReference: %%%s; not found\n",
7987
* Internal checking in case the entity quest barfed
7989
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7990
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7991
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7992
"Internal: %%%s; is not a parameter entity\n",
7994
} else if (ctxt->input->free != deallocblankswrapper) {
7995
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7996
if (xmlPushInput(ctxt, input) < 0)
8001
* handle the extra spaces added before and after
8002
* c.f. http://www.w3.org/TR/REC-xml#as-PE
8004
input = xmlNewEntityInputStream(ctxt, entity);
8005
if (xmlPushInput(ctxt, input) < 0)
8007
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8008
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8009
(IS_BLANK_CH(NXT(5)))) {
8010
xmlParseTextDecl(ctxt);
8012
XML_ERR_UNSUPPORTED_ENCODING) {
8014
* The XML REC instructs us to stop parsing
8017
ctxt->instate = XML_PARSER_EOF;
8023
ctxt->hasPErefs = 1;
8027
* xmlLoadEntityContent:
8028
* @ctxt: an XML parser context
8029
* @entity: an unloaded system entity
8031
* Load the original content of the given system entity from the
8032
* ExternalID/SystemID given. This is to be used for Included in Literal
8033
* http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8035
* Returns 0 in case of success and -1 in case of failure
8038
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8039
xmlParserInputPtr input;
8044
if ((ctxt == NULL) || (entity == NULL) ||
8045
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8046
(entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8047
(entity->content != NULL)) {
8048
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8049
"xmlLoadEntityContent parameter error");
8053
if (xmlParserDebugEntities)
8054
xmlGenericError(xmlGenericErrorContext,
8055
"Reading %s entity content input\n", entity->name);
8057
buf = xmlBufferCreate();
8059
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8060
"xmlLoadEntityContent parameter error");
8064
input = xmlNewEntityInputStream(ctxt, entity);
8065
if (input == NULL) {
8066
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8067
"xmlLoadEntityContent input error");
8073
* Push the entity as the current input, read char by char
8074
* saving to the buffer until the end of the entity or an error
8076
if (xmlPushInput(ctxt, input) < 0) {
8083
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8085
xmlBufferAdd(buf, ctxt->input->cur, l);
8086
if (count++ > XML_PARSER_CHUNK_SIZE) {
8089
if (ctxt->instate == XML_PARSER_EOF) {
8099
if (ctxt->instate == XML_PARSER_EOF) {
8107
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8109
} else if (!IS_CHAR(c)) {
8110
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8111
"xmlLoadEntityContent: invalid char value %d\n",
8116
entity->content = buf->content;
8117
buf->content = NULL;
8124
* xmlParseStringPEReference:
8125
* @ctxt: an XML parser context
8126
* @str: a pointer to an index in the string
8128
* parse PEReference declarations
8130
* [69] PEReference ::= '%' Name ';'
8132
* [ WFC: No Recursion ]
8133
* A parsed entity must not contain a recursive
8134
* reference to itself, either directly or indirectly.
8136
* [ WFC: Entity Declared ]
8137
* In a document without any DTD, a document with only an internal DTD
8138
* subset which contains no parameter entity references, or a document
8139
* with "standalone='yes'", ... ... The declaration of a parameter
8140
* entity must precede any reference to it...
8142
* [ VC: Entity Declared ]
8143
* In a document with an external subset or external parameter entities
8144
* with "standalone='no'", ... ... The declaration of a parameter entity
8145
* must precede any reference to it...
8148
* Parameter-entity references may only appear in the DTD.
8149
* NOTE: misleading but this is handled.
8151
* Returns the string of the entity content.
8152
* str is updated to the current value of the index
8155
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8159
xmlEntityPtr entity = NULL;
8161
if ((str == NULL) || (*str == NULL)) return(NULL);
8167
name = xmlParseStringName(ctxt, &ptr);
8169
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8170
"xmlParseStringPEReference: no name\n");
8176
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8184
* Increate the number of entity references parsed
8189
* Request the entity from SAX
8191
if ((ctxt->sax != NULL) &&
8192
(ctxt->sax->getParameterEntity != NULL))
8193
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8194
if (ctxt->instate == XML_PARSER_EOF) {
8198
if (entity == NULL) {
8200
* [ WFC: Entity Declared ]
8201
* In a document without any DTD, a document with only an
8202
* internal DTD subset which contains no parameter entity
8203
* references, or a document with "standalone='yes'", ...
8204
* ... The declaration of a parameter entity must precede
8205
* any reference to it...
8207
if ((ctxt->standalone == 1) ||
8208
((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8209
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8210
"PEReference: %%%s; not found\n", name);
8213
* [ VC: Entity Declared ]
8214
* In a document with an external subset or external
8215
* parameter entities with "standalone='no'", ...
8216
* ... The declaration of a parameter entity must
8217
* precede any reference to it...
8219
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8220
"PEReference: %%%s; not found\n",
8226
* Internal checking in case the entity quest barfed
8228
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8229
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8230
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8231
"%%%s; is not a parameter entity\n",
8235
ctxt->hasPErefs = 1;
8242
* xmlParseDocTypeDecl:
8243
* @ctxt: an XML parser context
8245
* parse a DOCTYPE declaration
8247
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8248
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8250
* [ VC: Root Element Type ]
8251
* The Name in the document type declaration must match the element
8252
* type of the root element.
8256
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8257
const xmlChar *name = NULL;
8258
xmlChar *ExternalID = NULL;
8259
xmlChar *URI = NULL;
8262
* We know that '<!DOCTYPE' has been detected.
8269
* Parse the DOCTYPE name.
8271
name = xmlParseName(ctxt);
8273
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8274
"xmlParseDocTypeDecl : no DOCTYPE name !\n");
8276
ctxt->intSubName = name;
8281
* Check for SystemID and ExternalID
8283
URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8285
if ((URI != NULL) || (ExternalID != NULL)) {
8286
ctxt->hasExternalSubset = 1;
8288
ctxt->extSubURI = URI;
8289
ctxt->extSubSystem = ExternalID;
8294
* Create and update the internal subset.
8296
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8297
(!ctxt->disableSAX))
8298
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8299
if (ctxt->instate == XML_PARSER_EOF)
8303
* Is there any internal subset declarations ?
8304
* they are handled separately in xmlParseInternalSubset()
8310
* We should be at the end of the DOCTYPE declaration.
8313
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8319
* xmlParseInternalSubset:
8320
* @ctxt: an XML parser context
8322
* parse the internal subset declaration
8324
* [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8328
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8330
* Is there any DTD definition ?
8333
ctxt->instate = XML_PARSER_DTD;
8336
* Parse the succession of Markup declarations and
8338
* Subsequence (markupdecl | PEReference | S)*
8340
while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8341
const xmlChar *check = CUR_PTR;
8342
unsigned int cons = ctxt->input->consumed;
8345
xmlParseMarkupDecl(ctxt);
8346
xmlParsePEReference(ctxt);
8349
* Pop-up of finished entities.
8351
while ((RAW == 0) && (ctxt->inputNr > 1))
8354
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8355
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8356
"xmlParseInternalSubset: error detected in Markup declaration\n");
8367
* We should be at the end of the DOCTYPE declaration.
8370
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8375
#ifdef LIBXML_SAX1_ENABLED
8377
* xmlParseAttribute:
8378
* @ctxt: an XML parser context
8379
* @value: a xmlChar ** used to store the value of the attribute
8381
* parse an attribute
8383
* [41] Attribute ::= Name Eq AttValue
8385
* [ WFC: No External Entity References ]
8386
* Attribute values cannot contain direct or indirect entity references
8387
* to external entities.
8389
* [ WFC: No < in Attribute Values ]
8390
* The replacement text of any entity referred to directly or indirectly in
8391
* an attribute value (other than "<") must not contain a <.
8393
* [ VC: Attribute Value Type ]
8394
* The attribute must have been declared; the value must be of the type
8397
* [25] Eq ::= S? '=' S?
8401
* [NS 11] Attribute ::= QName Eq AttValue
8403
* Also the case QName == xmlns:??? is handled independently as a namespace
8406
* Returns the attribute name, and the value in *value.
8410
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8411
const xmlChar *name;
8416
name = xmlParseName(ctxt);
8418
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8419
"error parsing attribute name\n");
8430
val = xmlParseAttValue(ctxt);
8431
ctxt->instate = XML_PARSER_CONTENT;
8433
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8434
"Specification mandate value for attribute %s\n", name);
8439
* Check that xml:lang conforms to the specification
8440
* No more registered as an error, just generate a warning now
8441
* since this was deprecated in XML second edition
8443
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8444
if (!xmlCheckLanguageID(val)) {
8445
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8446
"Malformed value for xml:lang : %s\n",
8452
* Check that xml:space conforms to the specification
8454
if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8455
if (xmlStrEqual(val, BAD_CAST "default"))
8457
else if (xmlStrEqual(val, BAD_CAST "preserve"))
8460
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8461
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8472
* @ctxt: an XML parser context
8474
* parse a start of tag either for rule element or
8475
* EmptyElement. In both case we don't parse the tag closing chars.
8477
* [40] STag ::= '<' Name (S Attribute)* S? '>'
8479
* [ WFC: Unique Att Spec ]
8480
* No attribute name may appear more than once in the same start-tag or
8481
* empty-element tag.
8483
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8485
* [ WFC: Unique Att Spec ]
8486
* No attribute name may appear more than once in the same start-tag or
8487
* empty-element tag.
8491
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8493
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8495
* Returns the element name parsed
8499
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8500
const xmlChar *name;
8501
const xmlChar *attname;
8503
const xmlChar **atts = ctxt->atts;
8505
int maxatts = ctxt->maxatts;
8508
if (RAW != '<') return(NULL);
8511
name = xmlParseName(ctxt);
8513
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8514
"xmlParseStartTag: invalid element name\n");
8519
* Now parse the attributes, it ends up with the ending
8526
while (((RAW != '>') &&
8527
((RAW != '/') || (NXT(1) != '>')) &&
8528
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8529
const xmlChar *q = CUR_PTR;
8530
unsigned int cons = ctxt->input->consumed;
8532
attname = xmlParseAttribute(ctxt, &attvalue);
8533
if ((attname != NULL) && (attvalue != NULL)) {
8535
* [ WFC: Unique Att Spec ]
8536
* No attribute name may appear more than once in the same
8537
* start-tag or empty-element tag.
8539
for (i = 0; i < nbatts;i += 2) {
8540
if (xmlStrEqual(atts[i], attname)) {
8541
xmlErrAttributeDup(ctxt, NULL, attname);
8547
* Add the pair to atts
8550
maxatts = 22; /* allow for 10 attrs by default */
8551
atts = (const xmlChar **)
8552
xmlMalloc(maxatts * sizeof(xmlChar *));
8554
xmlErrMemory(ctxt, NULL);
8555
if (attvalue != NULL)
8560
ctxt->maxatts = maxatts;
8561
} else if (nbatts + 4 > maxatts) {
8565
n = (const xmlChar **) xmlRealloc((void *) atts,
8566
maxatts * sizeof(const xmlChar *));
8568
xmlErrMemory(ctxt, NULL);
8569
if (attvalue != NULL)
8575
ctxt->maxatts = maxatts;
8577
atts[nbatts++] = attname;
8578
atts[nbatts++] = attvalue;
8579
atts[nbatts] = NULL;
8580
atts[nbatts + 1] = NULL;
8582
if (attvalue != NULL)
8589
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8591
if (!IS_BLANK_CH(RAW)) {
8592
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8593
"attributes construct error\n");
8596
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8597
(attname == NULL) && (attvalue == NULL)) {
8598
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8599
"xmlParseStartTag: problem parsing attributes\n");
8607
* SAX: Start of Element !
8609
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8610
(!ctxt->disableSAX)) {
8612
ctxt->sax->startElement(ctxt->userData, name, atts);
8614
ctxt->sax->startElement(ctxt->userData, name, NULL);
8618
/* Free only the content strings */
8619
for (i = 1;i < nbatts;i+=2)
8620
if (atts[i] != NULL)
8621
xmlFree((xmlChar *) atts[i]);
8628
* @ctxt: an XML parser context
8629
* @line: line of the start tag
8630
* @nsNr: number of namespaces on the start tag
8632
* parse an end of tag
8634
* [42] ETag ::= '</' Name S? '>'
8638
* [NS 9] ETag ::= '</' QName S? '>'
8642
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8643
const xmlChar *name;
8646
if ((RAW != '<') || (NXT(1) != '/')) {
8647
xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8648
"xmlParseEndTag: '</' not found\n");
8653
name = xmlParseNameAndCompare(ctxt,ctxt->name);
8656
* We should definitely be at the ending "S? '>'" part
8660
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8661
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8666
* [ WFC: Element Type Match ]
8667
* The Name in an element's end-tag must match the element type in the
8671
if (name != (xmlChar*)1) {
8672
if (name == NULL) name = BAD_CAST "unparseable";
8673
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8674
"Opening and ending tag mismatch: %s line %d and %s\n",
8675
ctxt->name, line, name);
8681
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8682
(!ctxt->disableSAX))
8683
ctxt->sax->endElement(ctxt->userData, ctxt->name);
8692
* @ctxt: an XML parser context
8694
* parse an end of tag
8696
* [42] ETag ::= '</' Name S? '>'
8700
* [NS 9] ETag ::= '</' QName S? '>'
8704
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8705
xmlParseEndTag1(ctxt, 0);
8707
#endif /* LIBXML_SAX1_ENABLED */
8709
/************************************************************************
8711
* SAX 2 specific operations *
8713
************************************************************************/
8717
* @ctxt: an XML parser context
8718
* @prefix: the prefix to lookup
8720
* Lookup the namespace name for the @prefix (which ca be NULL)
8721
* The prefix must come from the @ctxt->dict dictionnary
8723
* Returns the namespace name or NULL if not bound
8725
static const xmlChar *
8726
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8729
if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8730
for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8731
if (ctxt->nsTab[i] == prefix) {
8732
if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8734
return(ctxt->nsTab[i + 1]);
8741
* @ctxt: an XML parser context
8742
* @prefix: pointer to store the prefix part
8744
* parse an XML Namespace QName
8746
* [6] QName ::= (Prefix ':')? LocalPart
8747
* [7] Prefix ::= NCName
8748
* [8] LocalPart ::= NCName
8750
* Returns the Name parsed or NULL
8753
static const xmlChar *
8754
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8755
const xmlChar *l, *p;
8759
l = xmlParseNCName(ctxt);
8762
l = xmlParseName(ctxt);
8764
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8765
"Failed to parse QName '%s'\n", l, NULL, NULL);
8775
l = xmlParseNCName(ctxt);
8779
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8780
"Failed to parse QName '%s:'\n", p, NULL, NULL);
8781
l = xmlParseNmtoken(ctxt);
8783
tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8785
tmp = xmlBuildQName(l, p, NULL, 0);
8788
p = xmlDictLookup(ctxt->dict, tmp, -1);
8789
if (tmp != NULL) xmlFree(tmp);
8796
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8797
"Failed to parse QName '%s:%s:'\n", p, l, NULL);
8799
tmp = (xmlChar *) xmlParseName(ctxt);
8801
tmp = xmlBuildQName(tmp, l, NULL, 0);
8802
l = xmlDictLookup(ctxt->dict, tmp, -1);
8803
if (tmp != NULL) xmlFree(tmp);
8807
tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8808
l = xmlDictLookup(ctxt->dict, tmp, -1);
8809
if (tmp != NULL) xmlFree(tmp);
8820
* xmlParseQNameAndCompare:
8821
* @ctxt: an XML parser context
8822
* @name: the localname
8823
* @prefix: the prefix, if any.
8825
* parse an XML name and compares for match
8826
* (specialized for endtag parsing)
8828
* Returns NULL for an illegal name, (xmlChar*) 1 for success
8829
* and the name for mismatch
8832
static const xmlChar *
8833
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8834
xmlChar const *prefix) {
8838
const xmlChar *prefix2;
8840
if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8843
in = ctxt->input->cur;
8846
while (*in != 0 && *in == *cmp) {
8850
if ((*cmp == 0) && (*in == ':')) {
8853
while (*in != 0 && *in == *cmp) {
8857
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8859
ctxt->input->cur = in;
8860
return((const xmlChar*) 1);
8864
* all strings coms from the dictionary, equality can be done directly
8866
ret = xmlParseQName (ctxt, &prefix2);
8867
if ((ret == name) && (prefix == prefix2))
8868
return((const xmlChar*) 1);
8873
* xmlParseAttValueInternal:
8874
* @ctxt: an XML parser context
8875
* @len: attribute len result
8876
* @alloc: whether the attribute was reallocated as a new string
8877
* @normalize: if 1 then further non-CDATA normalization must be done
8879
* parse a value for an attribute.
8880
* NOTE: if no normalization is needed, the routine will return pointers
8881
* directly from the data buffer.
8883
* 3.3.3 Attribute-Value Normalization:
8884
* Before the value of an attribute is passed to the application or
8885
* checked for validity, the XML processor must normalize it as follows:
8886
* - a character reference is processed by appending the referenced
8887
* character to the attribute value
8888
* - an entity reference is processed by recursively processing the
8889
* replacement text of the entity
8890
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8891
* appending #x20 to the normalized value, except that only a single
8892
* #x20 is appended for a "#xD#xA" sequence that is part of an external
8893
* parsed entity or the literal entity value of an internal parsed entity
8894
* - other characters are processed by appending them to the normalized value
8895
* If the declared value is not CDATA, then the XML processor must further
8896
* process the normalized attribute value by discarding any leading and
8897
* trailing space (#x20) characters, and by replacing sequences of space
8898
* (#x20) characters by a single space (#x20) character.
8899
* All attributes for which no declaration has been read should be treated
8900
* by a non-validating parser as if declared CDATA.
8902
* Returns the AttValue parsed or NULL. The value has to be freed by the
8903
* caller if it was copied, this can be detected by val[*len] == 0.
8907
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8911
const xmlChar *in = NULL, *start, *end, *last;
8912
xmlChar *ret = NULL;
8915
in = (xmlChar *) CUR_PTR;
8916
if (*in != '"' && *in != '\'') {
8917
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8920
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8923
* try to handle in this routine the most common case where no
8924
* allocation of a new string is required and where content is
8928
end = ctxt->input->end;
8931
const xmlChar *oldbase = ctxt->input->base;
8933
if (oldbase != ctxt->input->base) {
8934
long delta = ctxt->input->base - oldbase;
8935
start = start + delta;
8938
end = ctxt->input->end;
8942
* Skip any leading spaces
8944
while ((in < end) && (*in != limit) &&
8945
((*in == 0x20) || (*in == 0x9) ||
8946
(*in == 0xA) || (*in == 0xD))) {
8950
const xmlChar *oldbase = ctxt->input->base;
8952
if (ctxt->instate == XML_PARSER_EOF)
8954
if (oldbase != ctxt->input->base) {
8955
long delta = ctxt->input->base - oldbase;
8956
start = start + delta;
8959
end = ctxt->input->end;
8960
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8961
((ctxt->options & XML_PARSE_HUGE) == 0)) {
8962
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8963
"AttValue length too long\n");
8968
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8969
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8970
if ((*in++ == 0x20) && (*in == 0x20)) break;
8972
const xmlChar *oldbase = ctxt->input->base;
8974
if (ctxt->instate == XML_PARSER_EOF)
8976
if (oldbase != ctxt->input->base) {
8977
long delta = ctxt->input->base - oldbase;
8978
start = start + delta;
8981
end = ctxt->input->end;
8982
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8983
((ctxt->options & XML_PARSE_HUGE) == 0)) {
8984
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8985
"AttValue length too long\n");
8992
* skip the trailing blanks
8994
while ((last[-1] == 0x20) && (last > start)) last--;
8995
while ((in < end) && (*in != limit) &&
8996
((*in == 0x20) || (*in == 0x9) ||
8997
(*in == 0xA) || (*in == 0xD))) {
9000
const xmlChar *oldbase = ctxt->input->base;
9002
if (ctxt->instate == XML_PARSER_EOF)
9004
if (oldbase != ctxt->input->base) {
9005
long delta = ctxt->input->base - oldbase;
9006
start = start + delta;
9008
last = last + delta;
9010
end = ctxt->input->end;
9011
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9012
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9013
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9014
"AttValue length too long\n");
9019
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9020
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9021
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9022
"AttValue length too long\n");
9025
if (*in != limit) goto need_complex;
9027
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9028
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9031
const xmlChar *oldbase = ctxt->input->base;
9033
if (ctxt->instate == XML_PARSER_EOF)
9035
if (oldbase != ctxt->input->base) {
9036
long delta = ctxt->input->base - oldbase;
9037
start = start + delta;
9040
end = ctxt->input->end;
9041
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9042
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9043
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9044
"AttValue length too long\n");
9050
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9051
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9052
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9053
"AttValue length too long\n");
9056
if (*in != limit) goto need_complex;
9060
*len = last - start;
9061
ret = (xmlChar *) start;
9063
if (alloc) *alloc = 1;
9064
ret = xmlStrndup(start, last - start);
9067
if (alloc) *alloc = 0;
9070
if (alloc) *alloc = 1;
9071
return xmlParseAttValueComplex(ctxt, len, normalize);
9075
* xmlParseAttribute2:
9076
* @ctxt: an XML parser context
9077
* @pref: the element prefix
9078
* @elem: the element name
9079
* @prefix: a xmlChar ** used to store the value of the attribute prefix
9080
* @value: a xmlChar ** used to store the value of the attribute
9081
* @len: an int * to save the length of the attribute
9082
* @alloc: an int * to indicate if the attribute was allocated
9084
* parse an attribute in the new SAX2 framework.
9086
* Returns the attribute name, and the value in *value, .
9089
static const xmlChar *
9090
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9091
const xmlChar * pref, const xmlChar * elem,
9092
const xmlChar ** prefix, xmlChar ** value,
9093
int *len, int *alloc)
9095
const xmlChar *name;
9096
xmlChar *val, *internal_val = NULL;
9101
name = xmlParseQName(ctxt, prefix);
9103
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9104
"error parsing attribute name\n");
9109
* get the type if needed
9111
if (ctxt->attsSpecial != NULL) {
9114
type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9115
pref, elem, *prefix, name);
9127
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9130
* Sometimes a second normalisation pass for spaces is needed
9131
* but that only happens if charrefs or entities refernces
9132
* have been used in the attribute value, i.e. the attribute
9133
* value have been extracted in an allocated string already.
9136
const xmlChar *val2;
9138
val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9139
if ((val2 != NULL) && (val2 != val)) {
9141
val = (xmlChar *) val2;
9145
ctxt->instate = XML_PARSER_CONTENT;
9147
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9148
"Specification mandate value for attribute %s\n",
9153
if (*prefix == ctxt->str_xml) {
9155
* Check that xml:lang conforms to the specification
9156
* No more registered as an error, just generate a warning now
9157
* since this was deprecated in XML second edition
9159
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9160
internal_val = xmlStrndup(val, *len);
9161
if (!xmlCheckLanguageID(internal_val)) {
9162
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9163
"Malformed value for xml:lang : %s\n",
9164
internal_val, NULL);
9169
* Check that xml:space conforms to the specification
9171
if (xmlStrEqual(name, BAD_CAST "space")) {
9172
internal_val = xmlStrndup(val, *len);
9173
if (xmlStrEqual(internal_val, BAD_CAST "default"))
9175
else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9178
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9179
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9180
internal_val, NULL);
9184
xmlFree(internal_val);
9192
* xmlParseStartTag2:
9193
* @ctxt: an XML parser context
9195
* parse a start of tag either for rule element or
9196
* EmptyElement. In both case we don't parse the tag closing chars.
9197
* This routine is called when running SAX2 parsing
9199
* [40] STag ::= '<' Name (S Attribute)* S? '>'
9201
* [ WFC: Unique Att Spec ]
9202
* No attribute name may appear more than once in the same start-tag or
9203
* empty-element tag.
9205
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9207
* [ WFC: Unique Att Spec ]
9208
* No attribute name may appear more than once in the same start-tag or
9209
* empty-element tag.
9213
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9215
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9217
* Returns the element name parsed
9220
static const xmlChar *
9221
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9222
const xmlChar **URI, int *tlen) {
9223
const xmlChar *localname;
9224
const xmlChar *prefix;
9225
const xmlChar *attname;
9226
const xmlChar *aprefix;
9227
const xmlChar *nsname;
9229
const xmlChar **atts = ctxt->atts;
9230
int maxatts = ctxt->maxatts;
9231
int nratts, nbatts, nbdef;
9232
int i, j, nbNs, attval, oldline, oldcol;
9233
const xmlChar *base;
9235
int nsNr = ctxt->nsNr;
9237
if (RAW != '<') return(NULL);
9241
* NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9242
* point since the attribute values may be stored as pointers to
9243
* the buffer and calling SHRINK would destroy them !
9244
* The Shrinking is only possible once the full set of attribute
9245
* callbacks have been done.
9249
base = ctxt->input->base;
9250
cur = ctxt->input->cur - ctxt->input->base;
9251
oldline = ctxt->input->line;
9252
oldcol = ctxt->input->col;
9258
/* Forget any namespaces added during an earlier parse of this element. */
9261
localname = xmlParseQName(ctxt, &prefix);
9262
if (localname == NULL) {
9263
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9264
"StartTag: invalid element name\n");
9267
*tlen = ctxt->input->cur - ctxt->input->base - cur;
9270
* Now parse the attributes, it ends up with the ending
9276
if (ctxt->input->base != base) goto base_changed;
9278
while (((RAW != '>') &&
9279
((RAW != '/') || (NXT(1) != '>')) &&
9280
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9281
const xmlChar *q = CUR_PTR;
9282
unsigned int cons = ctxt->input->consumed;
9283
int len = -1, alloc = 0;
9285
attname = xmlParseAttribute2(ctxt, prefix, localname,
9286
&aprefix, &attvalue, &len, &alloc);
9287
if (ctxt->input->base != base) {
9288
if ((attvalue != NULL) && (alloc != 0))
9293
if ((attname != NULL) && (attvalue != NULL)) {
9294
if (len < 0) len = xmlStrlen(attvalue);
9295
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9296
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9300
uri = xmlParseURI((const char *) URL);
9302
xmlNsErr(ctxt, XML_WAR_NS_URI,
9303
"xmlns: '%s' is not a valid URI\n",
9306
if (uri->scheme == NULL) {
9307
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9308
"xmlns: URI %s is not absolute\n",
9313
if (URL == ctxt->str_xml_ns) {
9314
if (attname != ctxt->str_xml) {
9315
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9316
"xml namespace URI cannot be the default namespace\n",
9319
goto skip_default_ns;
9323
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9324
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9325
"reuse of the xmlns namespace name is forbidden\n",
9327
goto skip_default_ns;
9331
* check that it's not a defined namespace
9333
for (j = 1;j <= nbNs;j++)
9334
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9337
xmlErrAttributeDup(ctxt, NULL, attname);
9339
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9341
if (alloc != 0) xmlFree(attvalue);
9345
if (aprefix == ctxt->str_xmlns) {
9346
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9349
if (attname == ctxt->str_xml) {
9350
if (URL != ctxt->str_xml_ns) {
9351
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9352
"xml namespace prefix mapped to wrong URI\n",
9356
* Do not keep a namespace definition node
9360
if (URL == ctxt->str_xml_ns) {
9361
if (attname != ctxt->str_xml) {
9362
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9363
"xml namespace URI mapped to wrong prefix\n",
9368
if (attname == ctxt->str_xmlns) {
9369
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9370
"redefinition of the xmlns prefix is forbidden\n",
9376
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9377
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9378
"reuse of the xmlns namespace name is forbidden\n",
9382
if ((URL == NULL) || (URL[0] == 0)) {
9383
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9384
"xmlns:%s: Empty XML namespace is not allowed\n",
9385
attname, NULL, NULL);
9388
uri = xmlParseURI((const char *) URL);
9390
xmlNsErr(ctxt, XML_WAR_NS_URI,
9391
"xmlns:%s: '%s' is not a valid URI\n",
9392
attname, URL, NULL);
9394
if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9395
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9396
"xmlns:%s: URI %s is not absolute\n",
9397
attname, URL, NULL);
9404
* check that it's not a defined namespace
9406
for (j = 1;j <= nbNs;j++)
9407
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9410
xmlErrAttributeDup(ctxt, aprefix, attname);
9412
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9414
if (alloc != 0) xmlFree(attvalue);
9416
if (ctxt->input->base != base) goto base_changed;
9421
* Add the pair to atts
9423
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9424
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9425
if (attvalue[len] == 0)
9429
maxatts = ctxt->maxatts;
9432
ctxt->attallocs[nratts++] = alloc;
9433
atts[nbatts++] = attname;
9434
atts[nbatts++] = aprefix;
9435
atts[nbatts++] = NULL; /* the URI will be fetched later */
9436
atts[nbatts++] = attvalue;
9438
atts[nbatts++] = attvalue;
9440
* tag if some deallocation is needed
9442
if (alloc != 0) attval = 1;
9444
if ((attvalue != NULL) && (attvalue[len] == 0))
9451
if (ctxt->instate == XML_PARSER_EOF)
9453
if (ctxt->input->base != base) goto base_changed;
9454
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9456
if (!IS_BLANK_CH(RAW)) {
9457
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9458
"attributes construct error\n");
9462
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9463
(attname == NULL) && (attvalue == NULL)) {
9464
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9465
"xmlParseStartTag: problem parsing attributes\n");
9469
if (ctxt->input->base != base) goto base_changed;
9473
* The attributes defaulting
9475
if (ctxt->attsDefault != NULL) {
9476
xmlDefAttrsPtr defaults;
9478
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9479
if (defaults != NULL) {
9480
for (i = 0;i < defaults->nbAttrs;i++) {
9481
attname = defaults->values[5 * i];
9482
aprefix = defaults->values[5 * i + 1];
9485
* special work for namespaces defaulted defs
9487
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9489
* check that it's not a defined namespace
9491
for (j = 1;j <= nbNs;j++)
9492
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9494
if (j <= nbNs) continue;
9496
nsname = xmlGetNamespace(ctxt, NULL);
9497
if (nsname != defaults->values[5 * i + 2]) {
9498
if (nsPush(ctxt, NULL,
9499
defaults->values[5 * i + 2]) > 0)
9502
} else if (aprefix == ctxt->str_xmlns) {
9504
* check that it's not a defined namespace
9506
for (j = 1;j <= nbNs;j++)
9507
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9509
if (j <= nbNs) continue;
9511
nsname = xmlGetNamespace(ctxt, attname);
9512
if (nsname != defaults->values[2]) {
9513
if (nsPush(ctxt, attname,
9514
defaults->values[5 * i + 2]) > 0)
9519
* check that it's not a defined attribute
9521
for (j = 0;j < nbatts;j+=5) {
9522
if ((attname == atts[j]) && (aprefix == atts[j+1]))
9525
if (j < nbatts) continue;
9527
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9528
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9531
maxatts = ctxt->maxatts;
9534
atts[nbatts++] = attname;
9535
atts[nbatts++] = aprefix;
9536
if (aprefix == NULL)
9537
atts[nbatts++] = NULL;
9539
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9540
atts[nbatts++] = defaults->values[5 * i + 2];
9541
atts[nbatts++] = defaults->values[5 * i + 3];
9542
if ((ctxt->standalone == 1) &&
9543
(defaults->values[5 * i + 4] != NULL)) {
9544
xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9545
"standalone: attribute %s on %s defaulted from external subset\n",
9546
attname, localname);
9555
* The attributes checkings
9557
for (i = 0; i < nbatts;i += 5) {
9559
* The default namespace does not apply to attribute names.
9561
if (atts[i + 1] != NULL) {
9562
nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9563
if (nsname == NULL) {
9564
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9565
"Namespace prefix %s for %s on %s is not defined\n",
9566
atts[i + 1], atts[i], localname);
9568
atts[i + 2] = nsname;
9572
* [ WFC: Unique Att Spec ]
9573
* No attribute name may appear more than once in the same
9574
* start-tag or empty-element tag.
9575
* As extended by the Namespace in XML REC.
9577
for (j = 0; j < i;j += 5) {
9578
if (atts[i] == atts[j]) {
9579
if (atts[i+1] == atts[j+1]) {
9580
xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9583
if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9584
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9585
"Namespaced Attribute %s in '%s' redefined\n",
9586
atts[i], nsname, NULL);
9593
nsname = xmlGetNamespace(ctxt, prefix);
9594
if ((prefix != NULL) && (nsname == NULL)) {
9595
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9596
"Namespace prefix %s on %s is not defined\n",
9597
prefix, localname, NULL);
9603
* SAX: Start of Element !
9605
if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9606
(!ctxt->disableSAX)) {
9608
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9609
nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9610
nbatts / 5, nbdef, atts);
9612
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9613
nsname, 0, NULL, nbatts / 5, nbdef, atts);
9617
* Free up attribute allocated strings if needed
9620
for (i = 3,j = 0; j < nratts;i += 5,j++)
9621
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9622
xmlFree((xmlChar *) atts[i]);
9629
* the attribute strings are valid iif the base didn't changed
9632
for (i = 3,j = 0; j < nratts;i += 5,j++)
9633
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9634
xmlFree((xmlChar *) atts[i]);
9636
ctxt->input->cur = ctxt->input->base + cur;
9637
ctxt->input->line = oldline;
9638
ctxt->input->col = oldcol;
9639
if (ctxt->wellFormed == 1) {
9647
* @ctxt: an XML parser context
9648
* @line: line of the start tag
9649
* @nsNr: number of namespaces on the start tag
9651
* parse an end of tag
9653
* [42] ETag ::= '</' Name S? '>'
9657
* [NS 9] ETag ::= '</' QName S? '>'
9661
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9662
const xmlChar *URI, int line, int nsNr, int tlen) {
9663
const xmlChar *name;
9666
if ((RAW != '<') || (NXT(1) != '/')) {
9667
xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9672
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9673
if (ctxt->input->cur[tlen] == '>') {
9674
ctxt->input->cur += tlen + 1;
9677
ctxt->input->cur += tlen;
9681
name = xmlParseNameAndCompare(ctxt, ctxt->name);
9683
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9687
* We should definitely be at the ending "S? '>'" part
9690
if (ctxt->instate == XML_PARSER_EOF)
9693
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9694
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9699
* [ WFC: Element Type Match ]
9700
* The Name in an element's end-tag must match the element type in the
9704
if (name != (xmlChar*)1) {
9705
if (name == NULL) name = BAD_CAST "unparseable";
9706
if ((line == 0) && (ctxt->node != NULL))
9707
line = ctxt->node->line;
9708
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9709
"Opening and ending tag mismatch: %s line %d and %s\n",
9710
ctxt->name, line, name);
9717
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9718
(!ctxt->disableSAX))
9719
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9729
* @ctxt: an XML parser context
9731
* Parse escaped pure raw content.
9733
* [18] CDSect ::= CDStart CData CDEnd
9735
* [19] CDStart ::= '<![CDATA['
9737
* [20] Data ::= (Char* - (Char* ']]>' Char*))
9739
* [21] CDEnd ::= ']]>'
9742
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9743
xmlChar *buf = NULL;
9745
int size = XML_PARSER_BUFFER_SIZE;
9751
/* Check 2.6.0 was NXT(0) not RAW */
9752
if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9757
ctxt->instate = XML_PARSER_CDATA_SECTION;
9760
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9761
ctxt->instate = XML_PARSER_CONTENT;
9767
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9768
ctxt->instate = XML_PARSER_CONTENT;
9773
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9775
xmlErrMemory(ctxt, NULL);
9778
while (IS_CHAR(cur) &&
9779
((r != ']') || (s != ']') || (cur != '>'))) {
9780
if (len + 5 >= size) {
9783
if ((size > XML_MAX_TEXT_LENGTH) &&
9784
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9785
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9786
"CData section too big found", NULL);
9790
tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9793
xmlErrMemory(ctxt, NULL);
9799
COPY_BUF(rl,buf,len,r);
9807
if (ctxt->instate == XML_PARSER_EOF) {
9817
ctxt->instate = XML_PARSER_CONTENT;
9819
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9820
"CData section not finished\n%.50s\n", buf);
9827
* OK the buffer is to be consumed as cdata.
9829
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9830
if (ctxt->sax->cdataBlock != NULL)
9831
ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9832
else if (ctxt->sax->characters != NULL)
9833
ctxt->sax->characters(ctxt->userData, buf, len);
9840
* @ctxt: an XML parser context
9844
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9848
xmlParseContent(xmlParserCtxtPtr ctxt) {
9850
while ((RAW != 0) &&
9851
((RAW != '<') || (NXT(1) != '/')) &&
9852
(ctxt->instate != XML_PARSER_EOF)) {
9853
const xmlChar *test = CUR_PTR;
9854
unsigned int cons = ctxt->input->consumed;
9855
const xmlChar *cur = ctxt->input->cur;
9858
* First case : a Processing Instruction.
9860
if ((*cur == '<') && (cur[1] == '?')) {
9865
* Second case : a CDSection
9867
/* 2.6.0 test was *cur not RAW */
9868
else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9869
xmlParseCDSect(ctxt);
9873
* Third case : a comment
9875
else if ((*cur == '<') && (NXT(1) == '!') &&
9876
(NXT(2) == '-') && (NXT(3) == '-')) {
9877
xmlParseComment(ctxt);
9878
ctxt->instate = XML_PARSER_CONTENT;
9882
* Fourth case : a sub-element.
9884
else if (*cur == '<') {
9885
xmlParseElement(ctxt);
9889
* Fifth case : a reference. If if has not been resolved,
9890
* parsing returns it's Name, create the node
9893
else if (*cur == '&') {
9894
xmlParseReference(ctxt);
9898
* Last case, text. Note that References are handled directly.
9901
xmlParseCharData(ctxt, 0);
9906
* Pop-up of finished entities.
9908
while ((RAW == 0) && (ctxt->inputNr > 1))
9912
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9913
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9914
"detected an error in element content\n");
9915
ctxt->instate = XML_PARSER_EOF;
9923
* @ctxt: an XML parser context
9925
* parse an XML element, this is highly recursive
9927
* [39] element ::= EmptyElemTag | STag content ETag
9929
* [ WFC: Element Type Match ]
9930
* The Name in an element's end-tag must match the element type in the
9936
xmlParseElement(xmlParserCtxtPtr ctxt) {
9937
const xmlChar *name;
9938
const xmlChar *prefix = NULL;
9939
const xmlChar *URI = NULL;
9940
xmlParserNodeInfo node_info;
9943
int nsNr = ctxt->nsNr;
9945
if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9946
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9947
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9948
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9950
ctxt->instate = XML_PARSER_EOF;
9954
/* Capture start position */
9955
if (ctxt->record_info) {
9956
node_info.begin_pos = ctxt->input->consumed +
9957
(CUR_PTR - ctxt->input->base);
9958
node_info.begin_line = ctxt->input->line;
9961
if (ctxt->spaceNr == 0)
9962
spacePush(ctxt, -1);
9963
else if (*ctxt->space == -2)
9964
spacePush(ctxt, -1);
9966
spacePush(ctxt, *ctxt->space);
9968
line = ctxt->input->line;
9969
#ifdef LIBXML_SAX1_ENABLED
9971
#endif /* LIBXML_SAX1_ENABLED */
9972
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9973
#ifdef LIBXML_SAX1_ENABLED
9975
name = xmlParseStartTag(ctxt);
9976
#endif /* LIBXML_SAX1_ENABLED */
9977
if (ctxt->instate == XML_PARSER_EOF)
9983
namePush(ctxt, name);
9986
#ifdef LIBXML_VALID_ENABLED
9988
* [ VC: Root Element Type ]
9989
* The Name in the document type declaration must match the element
9990
* type of the root element.
9992
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9993
ctxt->node && (ctxt->node == ctxt->myDoc->children))
9994
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9995
#endif /* LIBXML_VALID_ENABLED */
9998
* Check for an Empty Element.
10000
if ((RAW == '/') && (NXT(1) == '>')) {
10003
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10004
(!ctxt->disableSAX))
10005
ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10006
#ifdef LIBXML_SAX1_ENABLED
10008
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10009
(!ctxt->disableSAX))
10010
ctxt->sax->endElement(ctxt->userData, name);
10011
#endif /* LIBXML_SAX1_ENABLED */
10015
if (nsNr != ctxt->nsNr)
10016
nsPop(ctxt, ctxt->nsNr - nsNr);
10017
if ( ret != NULL && ctxt->record_info ) {
10018
node_info.end_pos = ctxt->input->consumed +
10019
(CUR_PTR - ctxt->input->base);
10020
node_info.end_line = ctxt->input->line;
10021
node_info.node = ret;
10022
xmlParserAddNodeInfo(ctxt, &node_info);
10029
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10030
"Couldn't find end of Start Tag %s line %d\n",
10034
* end of parsing of this node.
10039
if (nsNr != ctxt->nsNr)
10040
nsPop(ctxt, ctxt->nsNr - nsNr);
10043
* Capture end position and add node
10045
if ( ret != NULL && ctxt->record_info ) {
10046
node_info.end_pos = ctxt->input->consumed +
10047
(CUR_PTR - ctxt->input->base);
10048
node_info.end_line = ctxt->input->line;
10049
node_info.node = ret;
10050
xmlParserAddNodeInfo(ctxt, &node_info);
10056
* Parse the content of the element:
10058
xmlParseContent(ctxt);
10059
if (ctxt->instate == XML_PARSER_EOF)
10061
if (!IS_BYTE_CHAR(RAW)) {
10062
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10063
"Premature end of data in tag %s line %d\n",
10067
* end of parsing of this node.
10072
if (nsNr != ctxt->nsNr)
10073
nsPop(ctxt, ctxt->nsNr - nsNr);
10078
* parse the end of tag: '</' should be here.
10081
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10084
#ifdef LIBXML_SAX1_ENABLED
10086
xmlParseEndTag1(ctxt, line);
10087
#endif /* LIBXML_SAX1_ENABLED */
10090
* Capture end position and add node
10092
if ( ret != NULL && ctxt->record_info ) {
10093
node_info.end_pos = ctxt->input->consumed +
10094
(CUR_PTR - ctxt->input->base);
10095
node_info.end_line = ctxt->input->line;
10096
node_info.node = ret;
10097
xmlParserAddNodeInfo(ctxt, &node_info);
10102
* xmlParseVersionNum:
10103
* @ctxt: an XML parser context
10105
* parse the XML version value.
10107
* [26] VersionNum ::= '1.' [0-9]+
10109
* In practice allow [0-9].[0-9]+ at that level
10111
* Returns the string giving the XML version number, or NULL
10114
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10115
xmlChar *buf = NULL;
10120
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10122
xmlErrMemory(ctxt, NULL);
10126
if (!((cur >= '0') && (cur <= '9'))) {
10140
while ((cur >= '0') && (cur <= '9')) {
10141
if (len + 1 >= size) {
10145
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10148
xmlErrMemory(ctxt, NULL);
10162
* xmlParseVersionInfo:
10163
* @ctxt: an XML parser context
10165
* parse the XML version.
10167
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10169
* [25] Eq ::= S? '=' S?
10171
* Returns the version string, e.g. "1.0"
10175
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10176
xmlChar *version = NULL;
10178
if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10182
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10189
version = xmlParseVersionNum(ctxt);
10191
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10194
} else if (RAW == '\''){
10196
version = xmlParseVersionNum(ctxt);
10198
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10202
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10210
* @ctxt: an XML parser context
10212
* parse the XML encoding name
10214
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10216
* Returns the encoding name value or NULL
10219
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10220
xmlChar *buf = NULL;
10226
if (((cur >= 'a') && (cur <= 'z')) ||
10227
((cur >= 'A') && (cur <= 'Z'))) {
10228
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10230
xmlErrMemory(ctxt, NULL);
10237
while (((cur >= 'a') && (cur <= 'z')) ||
10238
((cur >= 'A') && (cur <= 'Z')) ||
10239
((cur >= '0') && (cur <= '9')) ||
10240
(cur == '.') || (cur == '_') ||
10242
if (len + 1 >= size) {
10246
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10248
xmlErrMemory(ctxt, NULL);
10265
xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10271
* xmlParseEncodingDecl:
10272
* @ctxt: an XML parser context
10274
* parse the XML encoding declaration
10276
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10278
* this setups the conversion filters.
10280
* Returns the encoding value or NULL
10284
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10285
xmlChar *encoding = NULL;
10288
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10292
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10299
encoding = xmlParseEncName(ctxt);
10301
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10304
} else if (RAW == '\''){
10306
encoding = xmlParseEncName(ctxt);
10308
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10312
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10316
* Non standard parsing, allowing the user to ignore encoding
10318
if (ctxt->options & XML_PARSE_IGNORE_ENC)
10322
* UTF-16 encoding stwich has already taken place at this stage,
10323
* more over the little-endian/big-endian selection is already done
10325
if ((encoding != NULL) &&
10326
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10327
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10329
* If no encoding was passed to the parser, that we are
10330
* using UTF-16 and no decoder is present i.e. the
10331
* document is apparently UTF-8 compatible, then raise an
10332
* encoding mismatch fatal error
10334
if ((ctxt->encoding == NULL) &&
10335
(ctxt->input->buf != NULL) &&
10336
(ctxt->input->buf->encoder == NULL)) {
10337
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10338
"Document labelled UTF-16 but has UTF-8 content\n");
10340
if (ctxt->encoding != NULL)
10341
xmlFree((xmlChar *) ctxt->encoding);
10342
ctxt->encoding = encoding;
10345
* UTF-8 encoding is handled natively
10347
else if ((encoding != NULL) &&
10348
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10349
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10350
if (ctxt->encoding != NULL)
10351
xmlFree((xmlChar *) ctxt->encoding);
10352
ctxt->encoding = encoding;
10354
else if (encoding != NULL) {
10355
xmlCharEncodingHandlerPtr handler;
10357
if (ctxt->input->encoding != NULL)
10358
xmlFree((xmlChar *) ctxt->input->encoding);
10359
ctxt->input->encoding = encoding;
10361
handler = xmlFindCharEncodingHandler((const char *) encoding);
10362
if (handler != NULL) {
10363
xmlSwitchToEncoding(ctxt, handler);
10365
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10366
"Unsupported encoding %s\n", encoding);
10376
* @ctxt: an XML parser context
10378
* parse the XML standalone declaration
10380
* [32] SDDecl ::= S 'standalone' Eq
10381
* (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10383
* [ VC: Standalone Document Declaration ]
10384
* TODO The standalone document declaration must have the value "no"
10385
* if any external markup declarations contain declarations of:
10386
* - attributes with default values, if elements to which these
10387
* attributes apply appear in the document without specifications
10388
* of values for these attributes, or
10389
* - entities (other than amp, lt, gt, apos, quot), if references
10390
* to those entities appear in the document, or
10391
* - attributes with values subject to normalization, where the
10392
* attribute appears in the document with a value which will change
10393
* as a result of normalization, or
10394
* - element types with element content, if white space occurs directly
10395
* within any instance of those types.
10398
* 1 if standalone="yes"
10399
* 0 if standalone="no"
10400
* -2 if standalone attribute is missing or invalid
10401
* (A standalone value of -2 means that the XML declaration was found,
10402
* but no value was specified for the standalone attribute).
10406
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10407
int standalone = -2;
10410
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10414
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10415
return(standalone);
10421
if ((RAW == 'n') && (NXT(1) == 'o')) {
10424
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10429
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10432
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10435
} else if (RAW == '"'){
10437
if ((RAW == 'n') && (NXT(1) == 'o')) {
10440
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10445
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10448
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10452
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10455
return(standalone);
10460
* @ctxt: an XML parser context
10462
* parse an XML declaration header
10464
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10468
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10472
* This value for standalone indicates that the document has an
10473
* XML declaration but it does not have a standalone attribute.
10474
* It will be overwritten later if a standalone attribute is found.
10476
ctxt->input->standalone = -2;
10479
* We know that '<?xml' is here.
10483
if (!IS_BLANK_CH(RAW)) {
10484
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10485
"Blank needed after '<?xml'\n");
10490
* We must have the VersionInfo here.
10492
version = xmlParseVersionInfo(ctxt);
10493
if (version == NULL) {
10494
xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10496
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10498
* Changed here for XML-1.0 5th edition
10500
if (ctxt->options & XML_PARSE_OLD10) {
10501
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10502
"Unsupported version '%s'\n",
10505
if ((version[0] == '1') && ((version[1] == '.'))) {
10506
xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10507
"Unsupported version '%s'\n",
10510
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10511
"Unsupported version '%s'\n",
10516
if (ctxt->version != NULL)
10517
xmlFree((void *) ctxt->version);
10518
ctxt->version = version;
10522
* We may have the encoding declaration
10524
if (!IS_BLANK_CH(RAW)) {
10525
if ((RAW == '?') && (NXT(1) == '>')) {
10529
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10531
xmlParseEncodingDecl(ctxt);
10532
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10534
* The XML REC instructs us to stop parsing right here
10540
* We may have the standalone status.
10542
if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10543
if ((RAW == '?') && (NXT(1) == '>')) {
10547
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10551
* We can grow the input buffer freely at that point
10556
ctxt->input->standalone = xmlParseSDDecl(ctxt);
10559
if ((RAW == '?') && (NXT(1) == '>')) {
10561
} else if (RAW == '>') {
10562
/* Deprecated old WD ... */
10563
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10566
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10567
MOVETO_ENDTAG(CUR_PTR);
10574
* @ctxt: an XML parser context
10576
* parse an XML Misc* optional field.
10578
* [27] Misc ::= Comment | PI | S
10582
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10583
while ((ctxt->instate != XML_PARSER_EOF) &&
10584
(((RAW == '<') && (NXT(1) == '?')) ||
10585
(CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10586
IS_BLANK_CH(CUR))) {
10587
if ((RAW == '<') && (NXT(1) == '?')) {
10589
} else if (IS_BLANK_CH(CUR)) {
10592
xmlParseComment(ctxt);
10597
* xmlParseDocument:
10598
* @ctxt: an XML parser context
10600
* parse an XML document (and build a tree if using the standard SAX
10603
* [1] document ::= prolog element Misc*
10605
* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10607
* Returns 0, -1 in case of error. the parser context is augmented
10608
* as a result of the parsing.
10612
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10614
xmlCharEncoding enc;
10618
if ((ctxt == NULL) || (ctxt->input == NULL))
10624
* SAX: detecting the level.
10626
xmlDetectSAX2(ctxt);
10629
* SAX: beginning of the document processing.
10631
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10632
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10633
if (ctxt->instate == XML_PARSER_EOF)
10636
if ((ctxt->encoding == NULL) &&
10637
((ctxt->input->end - ctxt->input->cur) >= 4)) {
10639
* Get the 4 first bytes and decode the charset
10640
* if enc != XML_CHAR_ENCODING_NONE
10641
* plug some encoding conversion routines.
10647
enc = xmlDetectCharEncoding(&start[0], 4);
10648
if (enc != XML_CHAR_ENCODING_NONE) {
10649
xmlSwitchEncoding(ctxt, enc);
10655
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10659
* Check for the XMLDecl in the Prolog.
10660
* do not GROW here to avoid the detected encoder to decode more
10661
* than just the first line, unless the amount of data is really
10662
* too small to hold "<?xml version="1.0" encoding="foo"
10664
if ((ctxt->input->end - ctxt->input->cur) < 35) {
10667
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10670
* Note that we will switch encoding on the fly.
10672
xmlParseXMLDecl(ctxt);
10673
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10675
* The XML REC instructs us to stop parsing right here
10679
ctxt->standalone = ctxt->input->standalone;
10682
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10684
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10685
ctxt->sax->startDocument(ctxt->userData);
10686
if (ctxt->instate == XML_PARSER_EOF)
10690
* The Misc part of the Prolog
10693
xmlParseMisc(ctxt);
10696
* Then possibly doc type declaration(s) and more Misc
10697
* (doctypedecl Misc*)?
10700
if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10702
ctxt->inSubset = 1;
10703
xmlParseDocTypeDecl(ctxt);
10705
ctxt->instate = XML_PARSER_DTD;
10706
xmlParseInternalSubset(ctxt);
10707
if (ctxt->instate == XML_PARSER_EOF)
10712
* Create and update the external subset.
10714
ctxt->inSubset = 2;
10715
if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10716
(!ctxt->disableSAX))
10717
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10718
ctxt->extSubSystem, ctxt->extSubURI);
10719
if (ctxt->instate == XML_PARSER_EOF)
10721
ctxt->inSubset = 0;
10723
xmlCleanSpecialAttr(ctxt);
10725
ctxt->instate = XML_PARSER_PROLOG;
10726
xmlParseMisc(ctxt);
10730
* Time to start parsing the tree itself
10734
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10735
"Start tag expected, '<' not found\n");
10737
ctxt->instate = XML_PARSER_CONTENT;
10738
xmlParseElement(ctxt);
10739
ctxt->instate = XML_PARSER_EPILOG;
10743
* The Misc part at the end
10745
xmlParseMisc(ctxt);
10748
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10750
ctxt->instate = XML_PARSER_EOF;
10754
* SAX: end of the document processing.
10756
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10757
ctxt->sax->endDocument(ctxt->userData);
10760
* Remove locally kept entity definitions if the tree was not built
10762
if ((ctxt->myDoc != NULL) &&
10763
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10764
xmlFreeDoc(ctxt->myDoc);
10765
ctxt->myDoc = NULL;
10768
if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10769
ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10771
ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10772
if (ctxt->nsWellFormed)
10773
ctxt->myDoc->properties |= XML_DOC_NSVALID;
10774
if (ctxt->options & XML_PARSE_OLD10)
10775
ctxt->myDoc->properties |= XML_DOC_OLD10;
10777
if (! ctxt->wellFormed) {
10785
* xmlParseExtParsedEnt:
10786
* @ctxt: an XML parser context
10788
* parse a general parsed entity
10789
* An external general parsed entity is well-formed if it matches the
10790
* production labeled extParsedEnt.
10792
* [78] extParsedEnt ::= TextDecl? content
10794
* Returns 0, -1 in case of error. the parser context is augmented
10795
* as a result of the parsing.
10799
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10801
xmlCharEncoding enc;
10803
if ((ctxt == NULL) || (ctxt->input == NULL))
10806
xmlDefaultSAXHandlerInit();
10808
xmlDetectSAX2(ctxt);
10813
* SAX: beginning of the document processing.
10815
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10816
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10819
* Get the 4 first bytes and decode the charset
10820
* if enc != XML_CHAR_ENCODING_NONE
10821
* plug some encoding conversion routines.
10823
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10828
enc = xmlDetectCharEncoding(start, 4);
10829
if (enc != XML_CHAR_ENCODING_NONE) {
10830
xmlSwitchEncoding(ctxt, enc);
10836
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10840
* Check for the XMLDecl in the Prolog.
10843
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10846
* Note that we will switch encoding on the fly.
10848
xmlParseXMLDecl(ctxt);
10849
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10851
* The XML REC instructs us to stop parsing right here
10857
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10859
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10860
ctxt->sax->startDocument(ctxt->userData);
10861
if (ctxt->instate == XML_PARSER_EOF)
10865
* Doing validity checking on chunk doesn't make sense
10867
ctxt->instate = XML_PARSER_CONTENT;
10868
ctxt->validate = 0;
10869
ctxt->loadsubset = 0;
10872
xmlParseContent(ctxt);
10873
if (ctxt->instate == XML_PARSER_EOF)
10876
if ((RAW == '<') && (NXT(1) == '/')) {
10877
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10878
} else if (RAW != 0) {
10879
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10883
* SAX: end of the document processing.
10885
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10886
ctxt->sax->endDocument(ctxt->userData);
10888
if (! ctxt->wellFormed) return(-1);
10892
#ifdef LIBXML_PUSH_ENABLED
10893
/************************************************************************
10895
* Progressive parsing interfaces *
10897
************************************************************************/
10900
* xmlParseLookupSequence:
10901
* @ctxt: an XML parser context
10902
* @first: the first char to lookup
10903
* @next: the next char to lookup or zero
10904
* @third: the next char to lookup or zero
10906
* Try to find if a sequence (first, next, third) or just (first next) or
10907
* (first) is available in the input stream.
10908
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
10909
* to avoid rescanning sequences of bytes, it DOES change the state of the
10910
* parser, do not use liberally.
10912
* Returns the index to the current parsing point if the full sequence
10913
* is available, -1 otherwise.
10916
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10917
xmlChar next, xmlChar third) {
10919
xmlParserInputPtr in;
10920
const xmlChar *buf;
10923
if (in == NULL) return(-1);
10924
base = in->cur - in->base;
10925
if (base < 0) return(-1);
10926
if (ctxt->checkIndex > base)
10927
base = ctxt->checkIndex;
10928
if (in->buf == NULL) {
10932
buf = xmlBufContent(in->buf->buffer);
10933
len = xmlBufUse(in->buf->buffer);
10935
/* take into account the sequence length */
10936
if (third) len -= 2;
10937
else if (next) len --;
10938
for (;base < len;base++) {
10939
if (buf[base] == first) {
10941
if ((buf[base + 1] != next) ||
10942
(buf[base + 2] != third)) continue;
10943
} else if (next != 0) {
10944
if (buf[base + 1] != next) continue;
10946
ctxt->checkIndex = 0;
10949
xmlGenericError(xmlGenericErrorContext,
10950
"PP: lookup '%c' found at %d\n",
10952
else if (third == 0)
10953
xmlGenericError(xmlGenericErrorContext,
10954
"PP: lookup '%c%c' found at %d\n",
10955
first, next, base);
10957
xmlGenericError(xmlGenericErrorContext,
10958
"PP: lookup '%c%c%c' found at %d\n",
10959
first, next, third, base);
10961
return(base - (in->cur - in->base));
10964
ctxt->checkIndex = base;
10967
xmlGenericError(xmlGenericErrorContext,
10968
"PP: lookup '%c' failed\n", first);
10969
else if (third == 0)
10970
xmlGenericError(xmlGenericErrorContext,
10971
"PP: lookup '%c%c' failed\n", first, next);
10973
xmlGenericError(xmlGenericErrorContext,
10974
"PP: lookup '%c%c%c' failed\n", first, next, third);
10980
* xmlParseGetLasts:
10981
* @ctxt: an XML parser context
10982
* @lastlt: pointer to store the last '<' from the input
10983
* @lastgt: pointer to store the last '>' from the input
10985
* Lookup the last < and > in the current chunk
10988
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10989
const xmlChar **lastgt) {
10990
const xmlChar *tmp;
10992
if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10993
xmlGenericError(xmlGenericErrorContext,
10994
"Internal error: xmlParseGetLasts\n");
10997
if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10998
tmp = ctxt->input->end;
11000
while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11001
if (tmp < ctxt->input->base) {
11007
while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11008
if (*tmp == '\'') {
11010
while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11011
if (tmp < ctxt->input->end) tmp++;
11012
} else if (*tmp == '"') {
11014
while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11015
if (tmp < ctxt->input->end) tmp++;
11019
if (tmp < ctxt->input->end)
11024
while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11025
if (tmp >= ctxt->input->base)
11037
* xmlCheckCdataPush:
11038
* @cur: pointer to the bock of characters
11039
* @len: length of the block in bytes
11041
* Check that the block of characters is okay as SCdata content [20]
11043
* Returns the number of bytes to pass if okay, a negative index where an
11044
* UTF-8 error occured otherwise
11047
xmlCheckCdataPush(const xmlChar *utf, int len) {
11052
if ((utf == NULL) || (len <= 0))
11055
for (ix = 0; ix < len;) { /* string is 0-terminated */
11057
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11060
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11064
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11065
if (ix + 2 > len) return(ix);
11066
if ((utf[ix+1] & 0xc0 ) != 0x80)
11068
codepoint = (utf[ix] & 0x1f) << 6;
11069
codepoint |= utf[ix+1] & 0x3f;
11070
if (!xmlIsCharQ(codepoint))
11073
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11074
if (ix + 3 > len) return(ix);
11075
if (((utf[ix+1] & 0xc0) != 0x80) ||
11076
((utf[ix+2] & 0xc0) != 0x80))
11078
codepoint = (utf[ix] & 0xf) << 12;
11079
codepoint |= (utf[ix+1] & 0x3f) << 6;
11080
codepoint |= utf[ix+2] & 0x3f;
11081
if (!xmlIsCharQ(codepoint))
11084
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11085
if (ix + 4 > len) return(ix);
11086
if (((utf[ix+1] & 0xc0) != 0x80) ||
11087
((utf[ix+2] & 0xc0) != 0x80) ||
11088
((utf[ix+3] & 0xc0) != 0x80))
11090
codepoint = (utf[ix] & 0x7) << 18;
11091
codepoint |= (utf[ix+1] & 0x3f) << 12;
11092
codepoint |= (utf[ix+2] & 0x3f) << 6;
11093
codepoint |= utf[ix+3] & 0x3f;
11094
if (!xmlIsCharQ(codepoint))
11097
} else /* unknown encoding */
11104
* xmlParseTryOrFinish:
11105
* @ctxt: an XML parser context
11106
* @terminate: last chunk indicator
11108
* Try to progress on parsing
11110
* Returns zero if no parsing was possible
11113
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11117
const xmlChar *lastlt, *lastgt;
11119
if (ctxt->input == NULL)
11123
switch (ctxt->instate) {
11124
case XML_PARSER_EOF:
11125
xmlGenericError(xmlGenericErrorContext,
11126
"PP: try EOF\n"); break;
11127
case XML_PARSER_START:
11128
xmlGenericError(xmlGenericErrorContext,
11129
"PP: try START\n"); break;
11130
case XML_PARSER_MISC:
11131
xmlGenericError(xmlGenericErrorContext,
11132
"PP: try MISC\n");break;
11133
case XML_PARSER_COMMENT:
11134
xmlGenericError(xmlGenericErrorContext,
11135
"PP: try COMMENT\n");break;
11136
case XML_PARSER_PROLOG:
11137
xmlGenericError(xmlGenericErrorContext,
11138
"PP: try PROLOG\n");break;
11139
case XML_PARSER_START_TAG:
11140
xmlGenericError(xmlGenericErrorContext,
11141
"PP: try START_TAG\n");break;
11142
case XML_PARSER_CONTENT:
11143
xmlGenericError(xmlGenericErrorContext,
11144
"PP: try CONTENT\n");break;
11145
case XML_PARSER_CDATA_SECTION:
11146
xmlGenericError(xmlGenericErrorContext,
11147
"PP: try CDATA_SECTION\n");break;
11148
case XML_PARSER_END_TAG:
11149
xmlGenericError(xmlGenericErrorContext,
11150
"PP: try END_TAG\n");break;
11151
case XML_PARSER_ENTITY_DECL:
11152
xmlGenericError(xmlGenericErrorContext,
11153
"PP: try ENTITY_DECL\n");break;
11154
case XML_PARSER_ENTITY_VALUE:
11155
xmlGenericError(xmlGenericErrorContext,
11156
"PP: try ENTITY_VALUE\n");break;
11157
case XML_PARSER_ATTRIBUTE_VALUE:
11158
xmlGenericError(xmlGenericErrorContext,
11159
"PP: try ATTRIBUTE_VALUE\n");break;
11160
case XML_PARSER_DTD:
11161
xmlGenericError(xmlGenericErrorContext,
11162
"PP: try DTD\n");break;
11163
case XML_PARSER_EPILOG:
11164
xmlGenericError(xmlGenericErrorContext,
11165
"PP: try EPILOG\n");break;
11166
case XML_PARSER_PI:
11167
xmlGenericError(xmlGenericErrorContext,
11168
"PP: try PI\n");break;
11169
case XML_PARSER_IGNORE:
11170
xmlGenericError(xmlGenericErrorContext,
11171
"PP: try IGNORE\n");break;
11175
if ((ctxt->input != NULL) &&
11176
(ctxt->input->cur - ctxt->input->base > 4096)) {
11178
ctxt->checkIndex = 0;
11180
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11182
while (ctxt->instate != XML_PARSER_EOF) {
11183
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11188
* Pop-up of finished entities.
11190
while ((RAW == 0) && (ctxt->inputNr > 1))
11193
if (ctxt->input == NULL) break;
11194
if (ctxt->input->buf == NULL)
11195
avail = ctxt->input->length -
11196
(ctxt->input->cur - ctxt->input->base);
11199
* If we are operating on converted input, try to flush
11200
* remainng chars to avoid them stalling in the non-converted
11201
* buffer. But do not do this in document start where
11202
* encoding="..." may not have been read and we work on a
11203
* guessed encoding.
11205
if ((ctxt->instate != XML_PARSER_START) &&
11206
(ctxt->input->buf->raw != NULL) &&
11207
(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11208
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11210
size_t current = ctxt->input->cur - ctxt->input->base;
11212
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11213
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11216
avail = xmlBufUse(ctxt->input->buf->buffer) -
11217
(ctxt->input->cur - ctxt->input->base);
11221
switch (ctxt->instate) {
11222
case XML_PARSER_EOF:
11224
* Document parsing is done !
11227
case XML_PARSER_START:
11228
if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11230
xmlCharEncoding enc;
11233
* Very first chars read from the document flow.
11239
* Get the 4 first bytes and decode the charset
11240
* if enc != XML_CHAR_ENCODING_NONE
11241
* plug some encoding conversion routines,
11242
* else xmlSwitchEncoding will set to (default)
11249
enc = xmlDetectCharEncoding(start, 4);
11250
xmlSwitchEncoding(ctxt, enc);
11256
cur = ctxt->input->cur[0];
11257
next = ctxt->input->cur[1];
11259
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11260
ctxt->sax->setDocumentLocator(ctxt->userData,
11261
&xmlDefaultSAXLocator);
11262
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11263
ctxt->instate = XML_PARSER_EOF;
11265
xmlGenericError(xmlGenericErrorContext,
11266
"PP: entering EOF\n");
11268
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11269
ctxt->sax->endDocument(ctxt->userData);
11272
if ((cur == '<') && (next == '?')) {
11273
/* PI or XML decl */
11274
if (avail < 5) return(ret);
11275
if ((!terminate) &&
11276
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11278
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11279
ctxt->sax->setDocumentLocator(ctxt->userData,
11280
&xmlDefaultSAXLocator);
11281
if ((ctxt->input->cur[2] == 'x') &&
11282
(ctxt->input->cur[3] == 'm') &&
11283
(ctxt->input->cur[4] == 'l') &&
11284
(IS_BLANK_CH(ctxt->input->cur[5]))) {
11287
xmlGenericError(xmlGenericErrorContext,
11288
"PP: Parsing XML Decl\n");
11290
xmlParseXMLDecl(ctxt);
11291
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11293
* The XML REC instructs us to stop parsing right
11296
ctxt->instate = XML_PARSER_EOF;
11299
ctxt->standalone = ctxt->input->standalone;
11300
if ((ctxt->encoding == NULL) &&
11301
(ctxt->input->encoding != NULL))
11302
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11303
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11304
(!ctxt->disableSAX))
11305
ctxt->sax->startDocument(ctxt->userData);
11306
ctxt->instate = XML_PARSER_MISC;
11308
xmlGenericError(xmlGenericErrorContext,
11309
"PP: entering MISC\n");
11312
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11313
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11314
(!ctxt->disableSAX))
11315
ctxt->sax->startDocument(ctxt->userData);
11316
ctxt->instate = XML_PARSER_MISC;
11318
xmlGenericError(xmlGenericErrorContext,
11319
"PP: entering MISC\n");
11323
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11324
ctxt->sax->setDocumentLocator(ctxt->userData,
11325
&xmlDefaultSAXLocator);
11326
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11327
if (ctxt->version == NULL) {
11328
xmlErrMemory(ctxt, NULL);
11331
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11332
(!ctxt->disableSAX))
11333
ctxt->sax->startDocument(ctxt->userData);
11334
ctxt->instate = XML_PARSER_MISC;
11336
xmlGenericError(xmlGenericErrorContext,
11337
"PP: entering MISC\n");
11341
case XML_PARSER_START_TAG: {
11342
const xmlChar *name;
11343
const xmlChar *prefix = NULL;
11344
const xmlChar *URI = NULL;
11345
int nsNr = ctxt->nsNr;
11347
if ((avail < 2) && (ctxt->inputNr == 1))
11349
cur = ctxt->input->cur[0];
11351
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11352
ctxt->instate = XML_PARSER_EOF;
11353
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11354
ctxt->sax->endDocument(ctxt->userData);
11358
if (ctxt->progressive) {
11359
/* > can be found unescaped in attribute values */
11360
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11362
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11366
if (ctxt->spaceNr == 0)
11367
spacePush(ctxt, -1);
11368
else if (*ctxt->space == -2)
11369
spacePush(ctxt, -1);
11371
spacePush(ctxt, *ctxt->space);
11372
#ifdef LIBXML_SAX1_ENABLED
11374
#endif /* LIBXML_SAX1_ENABLED */
11375
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11376
#ifdef LIBXML_SAX1_ENABLED
11378
name = xmlParseStartTag(ctxt);
11379
#endif /* LIBXML_SAX1_ENABLED */
11380
if (ctxt->instate == XML_PARSER_EOF)
11382
if (name == NULL) {
11384
ctxt->instate = XML_PARSER_EOF;
11385
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11386
ctxt->sax->endDocument(ctxt->userData);
11389
#ifdef LIBXML_VALID_ENABLED
11391
* [ VC: Root Element Type ]
11392
* The Name in the document type declaration must match
11393
* the element type of the root element.
11395
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11396
ctxt->node && (ctxt->node == ctxt->myDoc->children))
11397
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11398
#endif /* LIBXML_VALID_ENABLED */
11401
* Check for an Empty Element.
11403
if ((RAW == '/') && (NXT(1) == '>')) {
11407
if ((ctxt->sax != NULL) &&
11408
(ctxt->sax->endElementNs != NULL) &&
11409
(!ctxt->disableSAX))
11410
ctxt->sax->endElementNs(ctxt->userData, name,
11412
if (ctxt->nsNr - nsNr > 0)
11413
nsPop(ctxt, ctxt->nsNr - nsNr);
11414
#ifdef LIBXML_SAX1_ENABLED
11416
if ((ctxt->sax != NULL) &&
11417
(ctxt->sax->endElement != NULL) &&
11418
(!ctxt->disableSAX))
11419
ctxt->sax->endElement(ctxt->userData, name);
11420
#endif /* LIBXML_SAX1_ENABLED */
11422
if (ctxt->instate == XML_PARSER_EOF)
11425
if (ctxt->nameNr == 0) {
11426
ctxt->instate = XML_PARSER_EPILOG;
11428
ctxt->instate = XML_PARSER_CONTENT;
11430
ctxt->progressive = 1;
11436
xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11437
"Couldn't find end of Start Tag %s\n",
11443
nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11444
#ifdef LIBXML_SAX1_ENABLED
11446
namePush(ctxt, name);
11447
#endif /* LIBXML_SAX1_ENABLED */
11449
ctxt->instate = XML_PARSER_CONTENT;
11450
ctxt->progressive = 1;
11453
case XML_PARSER_CONTENT: {
11454
const xmlChar *test;
11456
if ((avail < 2) && (ctxt->inputNr == 1))
11458
cur = ctxt->input->cur[0];
11459
next = ctxt->input->cur[1];
11462
cons = ctxt->input->consumed;
11463
if ((cur == '<') && (next == '/')) {
11464
ctxt->instate = XML_PARSER_END_TAG;
11466
} else if ((cur == '<') && (next == '?')) {
11467
if ((!terminate) &&
11468
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11469
ctxt->progressive = XML_PARSER_PI;
11473
ctxt->instate = XML_PARSER_CONTENT;
11474
ctxt->progressive = 1;
11475
} else if ((cur == '<') && (next != '!')) {
11476
ctxt->instate = XML_PARSER_START_TAG;
11478
} else if ((cur == '<') && (next == '!') &&
11479
(ctxt->input->cur[2] == '-') &&
11480
(ctxt->input->cur[3] == '-')) {
11485
ctxt->input->cur += 4;
11486
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11487
ctxt->input->cur -= 4;
11488
if ((!terminate) && (term < 0)) {
11489
ctxt->progressive = XML_PARSER_COMMENT;
11492
xmlParseComment(ctxt);
11493
ctxt->instate = XML_PARSER_CONTENT;
11494
ctxt->progressive = 1;
11495
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11496
(ctxt->input->cur[2] == '[') &&
11497
(ctxt->input->cur[3] == 'C') &&
11498
(ctxt->input->cur[4] == 'D') &&
11499
(ctxt->input->cur[5] == 'A') &&
11500
(ctxt->input->cur[6] == 'T') &&
11501
(ctxt->input->cur[7] == 'A') &&
11502
(ctxt->input->cur[8] == '[')) {
11504
ctxt->instate = XML_PARSER_CDATA_SECTION;
11506
} else if ((cur == '<') && (next == '!') &&
11509
} else if (cur == '&') {
11510
if ((!terminate) &&
11511
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11513
xmlParseReference(ctxt);
11515
/* TODO Avoid the extra copy, handle directly !!! */
11517
* Goal of the following test is:
11518
* - minimize calls to the SAX 'character' callback
11519
* when they are mergeable
11520
* - handle an problem for isBlank when we only parse
11521
* a sequence of blank chars and the next one is
11522
* not available to check against '<' presence.
11523
* - tries to homogenize the differences in SAX
11524
* callbacks between the push and pull versions
11527
if ((ctxt->inputNr == 1) &&
11528
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11530
if (ctxt->progressive) {
11531
if ((lastlt == NULL) ||
11532
(ctxt->input->cur > lastlt))
11534
} else if (xmlParseLookupSequence(ctxt,
11540
ctxt->checkIndex = 0;
11541
xmlParseCharData(ctxt, 0);
11544
* Pop-up of finished entities.
11546
while ((RAW == 0) && (ctxt->inputNr > 1))
11548
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11549
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11550
"detected an error in element content\n");
11551
ctxt->instate = XML_PARSER_EOF;
11556
case XML_PARSER_END_TAG:
11560
if (ctxt->progressive) {
11561
/* > can be found unescaped in attribute values */
11562
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11564
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11569
xmlParseEndTag2(ctxt,
11570
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11571
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11572
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11575
#ifdef LIBXML_SAX1_ENABLED
11577
xmlParseEndTag1(ctxt, 0);
11578
#endif /* LIBXML_SAX1_ENABLED */
11579
if (ctxt->instate == XML_PARSER_EOF) {
11581
} else if (ctxt->nameNr == 0) {
11582
ctxt->instate = XML_PARSER_EPILOG;
11584
ctxt->instate = XML_PARSER_CONTENT;
11587
case XML_PARSER_CDATA_SECTION: {
11589
* The Push mode need to have the SAX callback for
11590
* cdataBlock merge back contiguous callbacks.
11594
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11596
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11599
tmp = xmlCheckCdataPush(ctxt->input->cur,
11600
XML_PARSER_BIG_BUFFER_SIZE);
11603
ctxt->input->cur += tmp;
11604
goto encoding_error;
11606
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11607
if (ctxt->sax->cdataBlock != NULL)
11608
ctxt->sax->cdataBlock(ctxt->userData,
11609
ctxt->input->cur, tmp);
11610
else if (ctxt->sax->characters != NULL)
11611
ctxt->sax->characters(ctxt->userData,
11612
ctxt->input->cur, tmp);
11614
if (ctxt->instate == XML_PARSER_EOF)
11617
ctxt->checkIndex = 0;
11623
tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11624
if ((tmp < 0) || (tmp != base)) {
11626
ctxt->input->cur += tmp;
11627
goto encoding_error;
11629
if ((ctxt->sax != NULL) && (base == 0) &&
11630
(ctxt->sax->cdataBlock != NULL) &&
11631
(!ctxt->disableSAX)) {
11633
* Special case to provide identical behaviour
11634
* between pull and push parsers on enpty CDATA
11637
if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11638
(!strncmp((const char *)&ctxt->input->cur[-9],
11640
ctxt->sax->cdataBlock(ctxt->userData,
11642
} else if ((ctxt->sax != NULL) && (base > 0) &&
11643
(!ctxt->disableSAX)) {
11644
if (ctxt->sax->cdataBlock != NULL)
11645
ctxt->sax->cdataBlock(ctxt->userData,
11646
ctxt->input->cur, base);
11647
else if (ctxt->sax->characters != NULL)
11648
ctxt->sax->characters(ctxt->userData,
11649
ctxt->input->cur, base);
11651
if (ctxt->instate == XML_PARSER_EOF)
11654
ctxt->checkIndex = 0;
11655
ctxt->instate = XML_PARSER_CONTENT;
11657
xmlGenericError(xmlGenericErrorContext,
11658
"PP: entering CONTENT\n");
11663
case XML_PARSER_MISC:
11665
if (ctxt->input->buf == NULL)
11666
avail = ctxt->input->length -
11667
(ctxt->input->cur - ctxt->input->base);
11669
avail = xmlBufUse(ctxt->input->buf->buffer) -
11670
(ctxt->input->cur - ctxt->input->base);
11673
cur = ctxt->input->cur[0];
11674
next = ctxt->input->cur[1];
11675
if ((cur == '<') && (next == '?')) {
11676
if ((!terminate) &&
11677
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11678
ctxt->progressive = XML_PARSER_PI;
11682
xmlGenericError(xmlGenericErrorContext,
11683
"PP: Parsing PI\n");
11686
if (ctxt->instate == XML_PARSER_EOF)
11688
ctxt->instate = XML_PARSER_MISC;
11689
ctxt->progressive = 1;
11690
ctxt->checkIndex = 0;
11691
} else if ((cur == '<') && (next == '!') &&
11692
(ctxt->input->cur[2] == '-') &&
11693
(ctxt->input->cur[3] == '-')) {
11694
if ((!terminate) &&
11695
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11696
ctxt->progressive = XML_PARSER_COMMENT;
11700
xmlGenericError(xmlGenericErrorContext,
11701
"PP: Parsing Comment\n");
11703
xmlParseComment(ctxt);
11704
if (ctxt->instate == XML_PARSER_EOF)
11706
ctxt->instate = XML_PARSER_MISC;
11707
ctxt->progressive = 1;
11708
ctxt->checkIndex = 0;
11709
} else if ((cur == '<') && (next == '!') &&
11710
(ctxt->input->cur[2] == 'D') &&
11711
(ctxt->input->cur[3] == 'O') &&
11712
(ctxt->input->cur[4] == 'C') &&
11713
(ctxt->input->cur[5] == 'T') &&
11714
(ctxt->input->cur[6] == 'Y') &&
11715
(ctxt->input->cur[7] == 'P') &&
11716
(ctxt->input->cur[8] == 'E')) {
11717
if ((!terminate) &&
11718
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11719
ctxt->progressive = XML_PARSER_DTD;
11723
xmlGenericError(xmlGenericErrorContext,
11724
"PP: Parsing internal subset\n");
11726
ctxt->inSubset = 1;
11727
ctxt->progressive = 0;
11728
ctxt->checkIndex = 0;
11729
xmlParseDocTypeDecl(ctxt);
11730
if (ctxt->instate == XML_PARSER_EOF)
11733
ctxt->instate = XML_PARSER_DTD;
11735
xmlGenericError(xmlGenericErrorContext,
11736
"PP: entering DTD\n");
11740
* Create and update the external subset.
11742
ctxt->inSubset = 2;
11743
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11744
(ctxt->sax->externalSubset != NULL))
11745
ctxt->sax->externalSubset(ctxt->userData,
11746
ctxt->intSubName, ctxt->extSubSystem,
11748
ctxt->inSubset = 0;
11749
xmlCleanSpecialAttr(ctxt);
11750
ctxt->instate = XML_PARSER_PROLOG;
11752
xmlGenericError(xmlGenericErrorContext,
11753
"PP: entering PROLOG\n");
11756
} else if ((cur == '<') && (next == '!') &&
11760
ctxt->instate = XML_PARSER_START_TAG;
11761
ctxt->progressive = XML_PARSER_START_TAG;
11762
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11764
xmlGenericError(xmlGenericErrorContext,
11765
"PP: entering START_TAG\n");
11769
case XML_PARSER_PROLOG:
11771
if (ctxt->input->buf == NULL)
11772
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11774
avail = xmlBufUse(ctxt->input->buf->buffer) -
11775
(ctxt->input->cur - ctxt->input->base);
11778
cur = ctxt->input->cur[0];
11779
next = ctxt->input->cur[1];
11780
if ((cur == '<') && (next == '?')) {
11781
if ((!terminate) &&
11782
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11783
ctxt->progressive = XML_PARSER_PI;
11787
xmlGenericError(xmlGenericErrorContext,
11788
"PP: Parsing PI\n");
11791
if (ctxt->instate == XML_PARSER_EOF)
11793
ctxt->instate = XML_PARSER_PROLOG;
11794
ctxt->progressive = 1;
11795
} else if ((cur == '<') && (next == '!') &&
11796
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11797
if ((!terminate) &&
11798
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11799
ctxt->progressive = XML_PARSER_COMMENT;
11803
xmlGenericError(xmlGenericErrorContext,
11804
"PP: Parsing Comment\n");
11806
xmlParseComment(ctxt);
11807
if (ctxt->instate == XML_PARSER_EOF)
11809
ctxt->instate = XML_PARSER_PROLOG;
11810
ctxt->progressive = 1;
11811
} else if ((cur == '<') && (next == '!') &&
11815
ctxt->instate = XML_PARSER_START_TAG;
11816
if (ctxt->progressive == 0)
11817
ctxt->progressive = XML_PARSER_START_TAG;
11818
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11820
xmlGenericError(xmlGenericErrorContext,
11821
"PP: entering START_TAG\n");
11825
case XML_PARSER_EPILOG:
11827
if (ctxt->input->buf == NULL)
11828
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11830
avail = xmlBufUse(ctxt->input->buf->buffer) -
11831
(ctxt->input->cur - ctxt->input->base);
11834
cur = ctxt->input->cur[0];
11835
next = ctxt->input->cur[1];
11836
if ((cur == '<') && (next == '?')) {
11837
if ((!terminate) &&
11838
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11839
ctxt->progressive = XML_PARSER_PI;
11843
xmlGenericError(xmlGenericErrorContext,
11844
"PP: Parsing PI\n");
11847
if (ctxt->instate == XML_PARSER_EOF)
11849
ctxt->instate = XML_PARSER_EPILOG;
11850
ctxt->progressive = 1;
11851
} else if ((cur == '<') && (next == '!') &&
11852
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11853
if ((!terminate) &&
11854
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11855
ctxt->progressive = XML_PARSER_COMMENT;
11859
xmlGenericError(xmlGenericErrorContext,
11860
"PP: Parsing Comment\n");
11862
xmlParseComment(ctxt);
11863
if (ctxt->instate == XML_PARSER_EOF)
11865
ctxt->instate = XML_PARSER_EPILOG;
11866
ctxt->progressive = 1;
11867
} else if ((cur == '<') && (next == '!') &&
11871
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11872
ctxt->instate = XML_PARSER_EOF;
11874
xmlGenericError(xmlGenericErrorContext,
11875
"PP: entering EOF\n");
11877
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11878
ctxt->sax->endDocument(ctxt->userData);
11882
case XML_PARSER_DTD: {
11884
* Sorry but progressive parsing of the internal subset
11885
* is not expected to be supported. We first check that
11886
* the full content of the internal subset is available and
11887
* the parsing is launched only at that point.
11888
* Internal subset ends up with "']' S? '>'" in an unescaped
11889
* section and not in a ']]>' sequence which are conditional
11890
* sections (whoever argued to keep that crap in XML deserve
11891
* a place in hell !).
11898
base = ctxt->input->cur - ctxt->input->base;
11899
if (base < 0) return(0);
11900
if (ctxt->checkIndex > base)
11901
base = ctxt->checkIndex;
11902
buf = xmlBufContent(ctxt->input->buf->buffer);
11903
use = xmlBufUse(ctxt->input->buf->buffer);
11904
for (;(unsigned int) base < use; base++) {
11906
if (buf[base] == quote)
11910
if ((quote == 0) && (buf[base] == '<')) {
11912
/* special handling of comments */
11913
if (((unsigned int) base + 4 < use) &&
11914
(buf[base + 1] == '!') &&
11915
(buf[base + 2] == '-') &&
11916
(buf[base + 3] == '-')) {
11917
for (;(unsigned int) base + 3 < use; base++) {
11918
if ((buf[base] == '-') &&
11919
(buf[base + 1] == '-') &&
11920
(buf[base + 2] == '>')) {
11928
fprintf(stderr, "unfinished comment\n");
11935
if (buf[base] == '"') {
11939
if (buf[base] == '\'') {
11943
if (buf[base] == ']') {
11945
fprintf(stderr, "%c%c%c%c: ", buf[base],
11946
buf[base + 1], buf[base + 2], buf[base + 3]);
11948
if ((unsigned int) base +1 >= use)
11950
if (buf[base + 1] == ']') {
11951
/* conditional crap, skip both ']' ! */
11955
for (i = 1; (unsigned int) base + i < use; i++) {
11956
if (buf[base + i] == '>') {
11958
fprintf(stderr, "found\n");
11960
goto found_end_int_subset;
11962
if (!IS_BLANK_CH(buf[base + i])) {
11964
fprintf(stderr, "not found\n");
11966
goto not_end_of_int_subset;
11970
fprintf(stderr, "end of stream\n");
11975
not_end_of_int_subset:
11976
continue; /* for */
11979
* We didn't found the end of the Internal subset
11982
ctxt->checkIndex = base;
11984
ctxt->checkIndex = 0;
11987
xmlGenericError(xmlGenericErrorContext,
11988
"PP: lookup of int subset end filed\n");
11992
found_end_int_subset:
11993
ctxt->checkIndex = 0;
11994
xmlParseInternalSubset(ctxt);
11995
if (ctxt->instate == XML_PARSER_EOF)
11997
ctxt->inSubset = 2;
11998
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11999
(ctxt->sax->externalSubset != NULL))
12000
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12001
ctxt->extSubSystem, ctxt->extSubURI);
12002
ctxt->inSubset = 0;
12003
xmlCleanSpecialAttr(ctxt);
12004
if (ctxt->instate == XML_PARSER_EOF)
12006
ctxt->instate = XML_PARSER_PROLOG;
12007
ctxt->checkIndex = 0;
12009
xmlGenericError(xmlGenericErrorContext,
12010
"PP: entering PROLOG\n");
12014
case XML_PARSER_COMMENT:
12015
xmlGenericError(xmlGenericErrorContext,
12016
"PP: internal error, state == COMMENT\n");
12017
ctxt->instate = XML_PARSER_CONTENT;
12019
xmlGenericError(xmlGenericErrorContext,
12020
"PP: entering CONTENT\n");
12023
case XML_PARSER_IGNORE:
12024
xmlGenericError(xmlGenericErrorContext,
12025
"PP: internal error, state == IGNORE");
12026
ctxt->instate = XML_PARSER_DTD;
12028
xmlGenericError(xmlGenericErrorContext,
12029
"PP: entering DTD\n");
12032
case XML_PARSER_PI:
12033
xmlGenericError(xmlGenericErrorContext,
12034
"PP: internal error, state == PI\n");
12035
ctxt->instate = XML_PARSER_CONTENT;
12037
xmlGenericError(xmlGenericErrorContext,
12038
"PP: entering CONTENT\n");
12041
case XML_PARSER_ENTITY_DECL:
12042
xmlGenericError(xmlGenericErrorContext,
12043
"PP: internal error, state == ENTITY_DECL\n");
12044
ctxt->instate = XML_PARSER_DTD;
12046
xmlGenericError(xmlGenericErrorContext,
12047
"PP: entering DTD\n");
12050
case XML_PARSER_ENTITY_VALUE:
12051
xmlGenericError(xmlGenericErrorContext,
12052
"PP: internal error, state == ENTITY_VALUE\n");
12053
ctxt->instate = XML_PARSER_CONTENT;
12055
xmlGenericError(xmlGenericErrorContext,
12056
"PP: entering DTD\n");
12059
case XML_PARSER_ATTRIBUTE_VALUE:
12060
xmlGenericError(xmlGenericErrorContext,
12061
"PP: internal error, state == ATTRIBUTE_VALUE\n");
12062
ctxt->instate = XML_PARSER_START_TAG;
12064
xmlGenericError(xmlGenericErrorContext,
12065
"PP: entering START_TAG\n");
12068
case XML_PARSER_SYSTEM_LITERAL:
12069
xmlGenericError(xmlGenericErrorContext,
12070
"PP: internal error, state == SYSTEM_LITERAL\n");
12071
ctxt->instate = XML_PARSER_START_TAG;
12073
xmlGenericError(xmlGenericErrorContext,
12074
"PP: entering START_TAG\n");
12077
case XML_PARSER_PUBLIC_LITERAL:
12078
xmlGenericError(xmlGenericErrorContext,
12079
"PP: internal error, state == PUBLIC_LITERAL\n");
12080
ctxt->instate = XML_PARSER_START_TAG;
12082
xmlGenericError(xmlGenericErrorContext,
12083
"PP: entering START_TAG\n");
12090
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12097
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12098
ctxt->input->cur[0], ctxt->input->cur[1],
12099
ctxt->input->cur[2], ctxt->input->cur[3]);
12100
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12101
"Input is not proper UTF-8, indicate encoding !\n%s",
12102
BAD_CAST buffer, NULL);
12108
* xmlParseCheckTransition:
12109
* @ctxt: an XML parser context
12110
* @chunk: a char array
12111
* @size: the size in byte of the chunk
12113
* Check depending on the current parser state if the chunk given must be
12114
* processed immediately or one need more data to advance on parsing.
12116
* Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12119
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12120
if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12122
if (ctxt->instate == XML_PARSER_START_TAG) {
12123
if (memchr(chunk, '>', size) != NULL)
12127
if (ctxt->progressive == XML_PARSER_COMMENT) {
12128
if (memchr(chunk, '>', size) != NULL)
12132
if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12133
if (memchr(chunk, '>', size) != NULL)
12137
if (ctxt->progressive == XML_PARSER_PI) {
12138
if (memchr(chunk, '>', size) != NULL)
12142
if (ctxt->instate == XML_PARSER_END_TAG) {
12143
if (memchr(chunk, '>', size) != NULL)
12147
if ((ctxt->progressive == XML_PARSER_DTD) ||
12148
(ctxt->instate == XML_PARSER_DTD)) {
12149
if (memchr(chunk, '>', size) != NULL)
12158
* @ctxt: an XML parser context
12159
* @chunk: an char array
12160
* @size: the size in byte of the chunk
12161
* @terminate: last chunk indicator
12163
* Parse a Chunk of memory
12165
* Returns zero if no error, the xmlParserErrors otherwise.
12168
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12172
size_t old_avail = 0;
12176
return(XML_ERR_INTERNAL_ERROR);
12177
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12178
return(ctxt->errNo);
12179
if (ctxt->instate == XML_PARSER_EOF)
12181
if (ctxt->instate == XML_PARSER_START)
12182
xmlDetectSAX2(ctxt);
12183
if ((size > 0) && (chunk != NULL) && (!terminate) &&
12184
(chunk[size - 1] == '\r')) {
12191
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12192
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12193
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12194
size_t cur = ctxt->input->cur - ctxt->input->base;
12197
old_avail = xmlBufUse(ctxt->input->buf->buffer);
12199
* Specific handling if we autodetected an encoding, we should not
12200
* push more than the first line ... which depend on the encoding
12201
* And only push the rest once the final encoding was detected
12203
if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12204
(ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12205
unsigned int len = 45;
12207
if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12208
BAD_CAST "UTF-16")) ||
12209
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12210
BAD_CAST "UTF16")))
12212
else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12213
BAD_CAST "UCS-4")) ||
12214
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12218
if (ctxt->input->buf->rawconsumed < len)
12219
len -= ctxt->input->buf->rawconsumed;
12222
* Change size for reading the initial declaration only
12223
* if size is greater than len. Otherwise, memmove in xmlBufferAdd
12224
* will blindly copy extra bytes from memory.
12226
if ((unsigned int) size > len) {
12227
remain = size - len;
12233
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12235
ctxt->errNo = XML_PARSER_EOF;
12236
ctxt->disableSAX = 1;
12237
return (XML_PARSER_EOF);
12239
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12241
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12244
} else if (ctxt->instate != XML_PARSER_EOF) {
12245
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12246
xmlParserInputBufferPtr in = ctxt->input->buf;
12247
if ((in->encoder != NULL) && (in->buffer != NULL) &&
12248
(in->raw != NULL)) {
12250
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12251
size_t current = ctxt->input->cur - ctxt->input->base;
12253
nbchars = xmlCharEncInput(in, terminate);
12256
xmlGenericError(xmlGenericErrorContext,
12257
"xmlParseChunk: encoder error\n");
12258
return(XML_ERR_INVALID_ENCODING);
12260
xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12265
xmlParseTryOrFinish(ctxt, 0);
12267
if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12268
avail = xmlBufUse(ctxt->input->buf->buffer);
12270
* Depending on the current state it may not be such
12271
* a good idea to try parsing if there is nothing in the chunk
12272
* which would be worth doing a parser state transition and we
12273
* need to wait for more data
12275
if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12276
(old_avail == 0) || (avail == 0) ||
12277
(xmlParseCheckTransition(ctxt,
12278
(const char *)&ctxt->input->base[old_avail],
12279
avail - old_avail)))
12280
xmlParseTryOrFinish(ctxt, terminate);
12282
if (ctxt->instate == XML_PARSER_EOF)
12283
return(ctxt->errNo);
12285
if ((ctxt->input != NULL) &&
12286
(((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12287
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12288
((ctxt->options & XML_PARSE_HUGE) == 0)) {
12289
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12290
ctxt->instate = XML_PARSER_EOF;
12292
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12293
return(ctxt->errNo);
12301
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12302
(ctxt->input->buf != NULL)) {
12303
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12305
size_t current = ctxt->input->cur - ctxt->input->base;
12307
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12309
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12314
* Check for termination
12318
if (ctxt->input != NULL) {
12319
if (ctxt->input->buf == NULL)
12320
cur_avail = ctxt->input->length -
12321
(ctxt->input->cur - ctxt->input->base);
12323
cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12324
(ctxt->input->cur - ctxt->input->base);
12327
if ((ctxt->instate != XML_PARSER_EOF) &&
12328
(ctxt->instate != XML_PARSER_EPILOG)) {
12329
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12331
if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12332
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12334
if (ctxt->instate != XML_PARSER_EOF) {
12335
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12336
ctxt->sax->endDocument(ctxt->userData);
12338
ctxt->instate = XML_PARSER_EOF;
12340
if (ctxt->wellFormed == 0)
12341
return((xmlParserErrors) ctxt->errNo);
12346
/************************************************************************
12348
* I/O front end functions to the parser *
12350
************************************************************************/
12353
* xmlCreatePushParserCtxt:
12354
* @sax: a SAX handler
12355
* @user_data: The user data returned on SAX callbacks
12356
* @chunk: a pointer to an array of chars
12357
* @size: number of chars in the array
12358
* @filename: an optional file name or URI
12360
* Create a parser context for using the XML parser in push mode.
12361
* If @buffer and @size are non-NULL, the data is used to detect
12362
* the encoding. The remaining characters will be parsed so they
12363
* don't need to be fed in again through xmlParseChunk.
12364
* To allow content encoding detection, @size should be >= 4
12365
* The value of @filename is used for fetching external entities
12366
* and error/warning reports.
12368
* Returns the new parser context or NULL
12372
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12373
const char *chunk, int size, const char *filename) {
12374
xmlParserCtxtPtr ctxt;
12375
xmlParserInputPtr inputStream;
12376
xmlParserInputBufferPtr buf;
12377
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12380
* plug some encoding conversion routines
12382
if ((chunk != NULL) && (size >= 4))
12383
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12385
buf = xmlAllocParserInputBuffer(enc);
12386
if (buf == NULL) return(NULL);
12388
ctxt = xmlNewParserCtxt();
12389
if (ctxt == NULL) {
12390
xmlErrMemory(NULL, "creating parser: out of memory\n");
12391
xmlFreeParserInputBuffer(buf);
12394
ctxt->dictNames = 1;
12395
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12396
if (ctxt->pushTab == NULL) {
12397
xmlErrMemory(ctxt, NULL);
12398
xmlFreeParserInputBuffer(buf);
12399
xmlFreeParserCtxt(ctxt);
12403
#ifdef LIBXML_SAX1_ENABLED
12404
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12405
#endif /* LIBXML_SAX1_ENABLED */
12406
xmlFree(ctxt->sax);
12407
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12408
if (ctxt->sax == NULL) {
12409
xmlErrMemory(ctxt, NULL);
12410
xmlFreeParserInputBuffer(buf);
12411
xmlFreeParserCtxt(ctxt);
12414
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12415
if (sax->initialized == XML_SAX2_MAGIC)
12416
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12418
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12419
if (user_data != NULL)
12420
ctxt->userData = user_data;
12422
if (filename == NULL) {
12423
ctxt->directory = NULL;
12425
ctxt->directory = xmlParserGetDirectory(filename);
12428
inputStream = xmlNewInputStream(ctxt);
12429
if (inputStream == NULL) {
12430
xmlFreeParserCtxt(ctxt);
12431
xmlFreeParserInputBuffer(buf);
12435
if (filename == NULL)
12436
inputStream->filename = NULL;
12438
inputStream->filename = (char *)
12439
xmlCanonicPath((const xmlChar *) filename);
12440
if (inputStream->filename == NULL) {
12441
xmlFreeParserCtxt(ctxt);
12442
xmlFreeParserInputBuffer(buf);
12446
inputStream->buf = buf;
12447
xmlBufResetInput(inputStream->buf->buffer, inputStream);
12448
inputPush(ctxt, inputStream);
12451
* If the caller didn't provide an initial 'chunk' for determining
12452
* the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12453
* that it can be automatically determined later
12455
if ((size == 0) || (chunk == NULL)) {
12456
ctxt->charset = XML_CHAR_ENCODING_NONE;
12457
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12458
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12459
size_t cur = ctxt->input->cur - ctxt->input->base;
12461
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12463
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12465
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12469
if (enc != XML_CHAR_ENCODING_NONE) {
12470
xmlSwitchEncoding(ctxt, enc);
12475
#endif /* LIBXML_PUSH_ENABLED */
12479
* @ctxt: an XML parser context
12481
* Blocks further parser processing
12484
xmlStopParser(xmlParserCtxtPtr ctxt) {
12487
ctxt->instate = XML_PARSER_EOF;
12488
ctxt->errNo = XML_ERR_USER_STOP;
12489
ctxt->disableSAX = 1;
12490
if (ctxt->input != NULL) {
12491
ctxt->input->cur = BAD_CAST"";
12492
ctxt->input->base = ctxt->input->cur;
12497
* xmlCreateIOParserCtxt:
12498
* @sax: a SAX handler
12499
* @user_data: The user data returned on SAX callbacks
12500
* @ioread: an I/O read function
12501
* @ioclose: an I/O close function
12502
* @ioctx: an I/O handler
12503
* @enc: the charset encoding if known
12505
* Create a parser context for using the XML parser with an existing
12508
* Returns the new parser context or NULL
12511
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12512
xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12513
void *ioctx, xmlCharEncoding enc) {
12514
xmlParserCtxtPtr ctxt;
12515
xmlParserInputPtr inputStream;
12516
xmlParserInputBufferPtr buf;
12518
if (ioread == NULL) return(NULL);
12520
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12522
if (ioclose != NULL)
12527
ctxt = xmlNewParserCtxt();
12528
if (ctxt == NULL) {
12529
xmlFreeParserInputBuffer(buf);
12533
#ifdef LIBXML_SAX1_ENABLED
12534
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12535
#endif /* LIBXML_SAX1_ENABLED */
12536
xmlFree(ctxt->sax);
12537
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12538
if (ctxt->sax == NULL) {
12539
xmlErrMemory(ctxt, NULL);
12540
xmlFreeParserCtxt(ctxt);
12543
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12544
if (sax->initialized == XML_SAX2_MAGIC)
12545
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12547
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12548
if (user_data != NULL)
12549
ctxt->userData = user_data;
12552
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12553
if (inputStream == NULL) {
12554
xmlFreeParserCtxt(ctxt);
12557
inputPush(ctxt, inputStream);
12562
#ifdef LIBXML_VALID_ENABLED
12563
/************************************************************************
12565
* Front ends when parsing a DTD *
12567
************************************************************************/
12571
* @sax: the SAX handler block or NULL
12572
* @input: an Input Buffer
12573
* @enc: the charset encoding if known
12575
* Load and parse a DTD
12577
* Returns the resulting xmlDtdPtr or NULL in case of error.
12578
* @input will be freed by the function in any case.
12582
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12583
xmlCharEncoding enc) {
12584
xmlDtdPtr ret = NULL;
12585
xmlParserCtxtPtr ctxt;
12586
xmlParserInputPtr pinput = NULL;
12592
ctxt = xmlNewParserCtxt();
12593
if (ctxt == NULL) {
12594
xmlFreeParserInputBuffer(input);
12599
* Set-up the SAX context
12602
if (ctxt->sax != NULL)
12603
xmlFree(ctxt->sax);
12605
ctxt->userData = ctxt;
12607
xmlDetectSAX2(ctxt);
12610
* generate a parser input from the I/O handler
12613
pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12614
if (pinput == NULL) {
12615
if (sax != NULL) ctxt->sax = NULL;
12616
xmlFreeParserInputBuffer(input);
12617
xmlFreeParserCtxt(ctxt);
12622
* plug some encoding conversion routines here.
12624
if (xmlPushInput(ctxt, pinput) < 0) {
12625
if (sax != NULL) ctxt->sax = NULL;
12626
xmlFreeParserCtxt(ctxt);
12629
if (enc != XML_CHAR_ENCODING_NONE) {
12630
xmlSwitchEncoding(ctxt, enc);
12633
pinput->filename = NULL;
12636
pinput->base = ctxt->input->cur;
12637
pinput->cur = ctxt->input->cur;
12638
pinput->free = NULL;
12641
* let's parse that entity knowing it's an external subset.
12643
ctxt->inSubset = 2;
12644
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12645
if (ctxt->myDoc == NULL) {
12646
xmlErrMemory(ctxt, "New Doc failed");
12649
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12650
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12651
BAD_CAST "none", BAD_CAST "none");
12653
if ((enc == XML_CHAR_ENCODING_NONE) &&
12654
((ctxt->input->end - ctxt->input->cur) >= 4)) {
12656
* Get the 4 first bytes and decode the charset
12657
* if enc != XML_CHAR_ENCODING_NONE
12658
* plug some encoding conversion routines.
12664
enc = xmlDetectCharEncoding(start, 4);
12665
if (enc != XML_CHAR_ENCODING_NONE) {
12666
xmlSwitchEncoding(ctxt, enc);
12670
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12672
if (ctxt->myDoc != NULL) {
12673
if (ctxt->wellFormed) {
12674
ret = ctxt->myDoc->extSubset;
12675
ctxt->myDoc->extSubset = NULL;
12680
tmp = ret->children;
12681
while (tmp != NULL) {
12689
xmlFreeDoc(ctxt->myDoc);
12690
ctxt->myDoc = NULL;
12692
if (sax != NULL) ctxt->sax = NULL;
12693
xmlFreeParserCtxt(ctxt);
12700
* @sax: the SAX handler block
12701
* @ExternalID: a NAME* containing the External ID of the DTD
12702
* @SystemID: a NAME* containing the URL to the DTD
12704
* Load and parse an external subset.
12706
* Returns the resulting xmlDtdPtr or NULL in case of error.
12710
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12711
const xmlChar *SystemID) {
12712
xmlDtdPtr ret = NULL;
12713
xmlParserCtxtPtr ctxt;
12714
xmlParserInputPtr input = NULL;
12715
xmlCharEncoding enc;
12716
xmlChar* systemIdCanonic;
12718
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12720
ctxt = xmlNewParserCtxt();
12721
if (ctxt == NULL) {
12726
* Set-up the SAX context
12729
if (ctxt->sax != NULL)
12730
xmlFree(ctxt->sax);
12732
ctxt->userData = ctxt;
12736
* Canonicalise the system ID
12738
systemIdCanonic = xmlCanonicPath(SystemID);
12739
if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12740
xmlFreeParserCtxt(ctxt);
12745
* Ask the Entity resolver to load the damn thing
12748
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12749
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12751
if (input == NULL) {
12752
if (sax != NULL) ctxt->sax = NULL;
12753
xmlFreeParserCtxt(ctxt);
12754
if (systemIdCanonic != NULL)
12755
xmlFree(systemIdCanonic);
12760
* plug some encoding conversion routines here.
12762
if (xmlPushInput(ctxt, input) < 0) {
12763
if (sax != NULL) ctxt->sax = NULL;
12764
xmlFreeParserCtxt(ctxt);
12765
if (systemIdCanonic != NULL)
12766
xmlFree(systemIdCanonic);
12769
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12770
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12771
xmlSwitchEncoding(ctxt, enc);
12774
if (input->filename == NULL)
12775
input->filename = (char *) systemIdCanonic;
12777
xmlFree(systemIdCanonic);
12780
input->base = ctxt->input->cur;
12781
input->cur = ctxt->input->cur;
12782
input->free = NULL;
12785
* let's parse that entity knowing it's an external subset.
12787
ctxt->inSubset = 2;
12788
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12789
if (ctxt->myDoc == NULL) {
12790
xmlErrMemory(ctxt, "New Doc failed");
12791
if (sax != NULL) ctxt->sax = NULL;
12792
xmlFreeParserCtxt(ctxt);
12795
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12796
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12797
ExternalID, SystemID);
12798
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12800
if (ctxt->myDoc != NULL) {
12801
if (ctxt->wellFormed) {
12802
ret = ctxt->myDoc->extSubset;
12803
ctxt->myDoc->extSubset = NULL;
12808
tmp = ret->children;
12809
while (tmp != NULL) {
12817
xmlFreeDoc(ctxt->myDoc);
12818
ctxt->myDoc = NULL;
12820
if (sax != NULL) ctxt->sax = NULL;
12821
xmlFreeParserCtxt(ctxt);
12829
* @ExternalID: a NAME* containing the External ID of the DTD
12830
* @SystemID: a NAME* containing the URL to the DTD
12832
* Load and parse an external subset.
12834
* Returns the resulting xmlDtdPtr or NULL in case of error.
12838
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12839
return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12841
#endif /* LIBXML_VALID_ENABLED */
12843
/************************************************************************
12845
* Front ends when parsing an Entity *
12847
************************************************************************/
12850
* xmlParseCtxtExternalEntity:
12851
* @ctx: the existing parsing context
12852
* @URL: the URL for the entity to load
12853
* @ID: the System ID for the entity to load
12854
* @lst: the return value for the set of parsed nodes
12856
* Parse an external general entity within an existing parsing context
12857
* An external general parsed entity is well-formed if it matches the
12858
* production labeled extParsedEnt.
12860
* [78] extParsedEnt ::= TextDecl? content
12862
* Returns 0 if the entity is well formed, -1 in case of args problem and
12863
* the parser error code otherwise
12867
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12868
const xmlChar *ID, xmlNodePtr *lst) {
12869
xmlParserCtxtPtr ctxt;
12871
xmlNodePtr newRoot;
12872
xmlSAXHandlerPtr oldsax = NULL;
12875
xmlCharEncoding enc;
12877
if (ctx == NULL) return(-1);
12879
if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12880
(ctx->depth > 1024)) {
12881
return(XML_ERR_ENTITY_LOOP);
12886
if ((URL == NULL) && (ID == NULL))
12888
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12891
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12892
if (ctxt == NULL) {
12896
oldsax = ctxt->sax;
12897
ctxt->sax = ctx->sax;
12898
xmlDetectSAX2(ctxt);
12899
newDoc = xmlNewDoc(BAD_CAST "1.0");
12900
if (newDoc == NULL) {
12901
xmlFreeParserCtxt(ctxt);
12904
newDoc->properties = XML_DOC_INTERNAL;
12905
if (ctx->myDoc->dict) {
12906
newDoc->dict = ctx->myDoc->dict;
12907
xmlDictReference(newDoc->dict);
12909
if (ctx->myDoc != NULL) {
12910
newDoc->intSubset = ctx->myDoc->intSubset;
12911
newDoc->extSubset = ctx->myDoc->extSubset;
12913
if (ctx->myDoc->URL != NULL) {
12914
newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12916
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12917
if (newRoot == NULL) {
12918
ctxt->sax = oldsax;
12919
xmlFreeParserCtxt(ctxt);
12920
newDoc->intSubset = NULL;
12921
newDoc->extSubset = NULL;
12922
xmlFreeDoc(newDoc);
12925
xmlAddChild((xmlNodePtr) newDoc, newRoot);
12926
nodePush(ctxt, newDoc->children);
12927
if (ctx->myDoc == NULL) {
12928
ctxt->myDoc = newDoc;
12930
ctxt->myDoc = ctx->myDoc;
12931
newDoc->children->doc = ctx->myDoc;
12935
* Get the 4 first bytes and decode the charset
12936
* if enc != XML_CHAR_ENCODING_NONE
12937
* plug some encoding conversion routines.
12940
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12945
enc = xmlDetectCharEncoding(start, 4);
12946
if (enc != XML_CHAR_ENCODING_NONE) {
12947
xmlSwitchEncoding(ctxt, enc);
12952
* Parse a possible text declaration first
12954
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12955
xmlParseTextDecl(ctxt);
12957
* An XML-1.0 document can't reference an entity not XML-1.0
12959
if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12960
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12961
xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12962
"Version mismatch between document and entity\n");
12967
* If the user provided its own SAX callbacks then reuse the
12968
* useData callback field, otherwise the expected setup in a
12969
* DOM builder is to have userData == ctxt
12971
if (ctx->userData == ctx)
12972
ctxt->userData = ctxt;
12974
ctxt->userData = ctx->userData;
12977
* Doing validity checking on chunk doesn't make sense
12979
ctxt->instate = XML_PARSER_CONTENT;
12980
ctxt->validate = ctx->validate;
12981
ctxt->valid = ctx->valid;
12982
ctxt->loadsubset = ctx->loadsubset;
12983
ctxt->depth = ctx->depth + 1;
12984
ctxt->replaceEntities = ctx->replaceEntities;
12985
if (ctxt->validate) {
12986
ctxt->vctxt.error = ctx->vctxt.error;
12987
ctxt->vctxt.warning = ctx->vctxt.warning;
12989
ctxt->vctxt.error = NULL;
12990
ctxt->vctxt.warning = NULL;
12992
ctxt->vctxt.nodeTab = NULL;
12993
ctxt->vctxt.nodeNr = 0;
12994
ctxt->vctxt.nodeMax = 0;
12995
ctxt->vctxt.node = NULL;
12996
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12997
ctxt->dict = ctx->dict;
12998
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12999
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13000
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13001
ctxt->dictNames = ctx->dictNames;
13002
ctxt->attsDefault = ctx->attsDefault;
13003
ctxt->attsSpecial = ctx->attsSpecial;
13004
ctxt->linenumbers = ctx->linenumbers;
13006
xmlParseContent(ctxt);
13008
ctx->validate = ctxt->validate;
13009
ctx->valid = ctxt->valid;
13010
if ((RAW == '<') && (NXT(1) == '/')) {
13011
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13012
} else if (RAW != 0) {
13013
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13015
if (ctxt->node != newDoc->children) {
13016
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13019
if (!ctxt->wellFormed) {
13020
if (ctxt->errNo == 0)
13029
* Return the newly created nodeset after unlinking it from
13030
* they pseudo parent.
13032
cur = newDoc->children->children;
13034
while (cur != NULL) {
13035
cur->parent = NULL;
13038
newDoc->children->children = NULL;
13042
ctxt->sax = oldsax;
13044
ctxt->attsDefault = NULL;
13045
ctxt->attsSpecial = NULL;
13046
xmlFreeParserCtxt(ctxt);
13047
newDoc->intSubset = NULL;
13048
newDoc->extSubset = NULL;
13049
xmlFreeDoc(newDoc);
13055
* xmlParseExternalEntityPrivate:
13056
* @doc: the document the chunk pertains to
13057
* @oldctxt: the previous parser context if available
13058
* @sax: the SAX handler bloc (possibly NULL)
13059
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13060
* @depth: Used for loop detection, use 0
13061
* @URL: the URL for the entity to load
13062
* @ID: the System ID for the entity to load
13063
* @list: the return value for the set of parsed nodes
13065
* Private version of xmlParseExternalEntity()
13067
* Returns 0 if the entity is well formed, -1 in case of args problem and
13068
* the parser error code otherwise
13071
static xmlParserErrors
13072
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13073
xmlSAXHandlerPtr sax,
13074
void *user_data, int depth, const xmlChar *URL,
13075
const xmlChar *ID, xmlNodePtr *list) {
13076
xmlParserCtxtPtr ctxt;
13078
xmlNodePtr newRoot;
13079
xmlSAXHandlerPtr oldsax = NULL;
13080
xmlParserErrors ret = XML_ERR_OK;
13082
xmlCharEncoding enc;
13084
if (((depth > 40) &&
13085
((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13087
return(XML_ERR_ENTITY_LOOP);
13092
if ((URL == NULL) && (ID == NULL))
13093
return(XML_ERR_INTERNAL_ERROR);
13095
return(XML_ERR_INTERNAL_ERROR);
13098
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13099
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13100
ctxt->userData = ctxt;
13101
if (oldctxt != NULL) {
13102
ctxt->_private = oldctxt->_private;
13103
ctxt->loadsubset = oldctxt->loadsubset;
13104
ctxt->validate = oldctxt->validate;
13105
ctxt->external = oldctxt->external;
13106
ctxt->record_info = oldctxt->record_info;
13107
ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13108
ctxt->node_seq.length = oldctxt->node_seq.length;
13109
ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13112
* Doing validity checking on chunk without context
13113
* doesn't make sense
13115
ctxt->_private = NULL;
13116
ctxt->validate = 0;
13117
ctxt->external = 2;
13118
ctxt->loadsubset = 0;
13121
oldsax = ctxt->sax;
13123
if (user_data != NULL)
13124
ctxt->userData = user_data;
13126
xmlDetectSAX2(ctxt);
13127
newDoc = xmlNewDoc(BAD_CAST "1.0");
13128
if (newDoc == NULL) {
13129
ctxt->node_seq.maximum = 0;
13130
ctxt->node_seq.length = 0;
13131
ctxt->node_seq.buffer = NULL;
13132
xmlFreeParserCtxt(ctxt);
13133
return(XML_ERR_INTERNAL_ERROR);
13135
newDoc->properties = XML_DOC_INTERNAL;
13136
newDoc->intSubset = doc->intSubset;
13137
newDoc->extSubset = doc->extSubset;
13138
newDoc->dict = doc->dict;
13139
xmlDictReference(newDoc->dict);
13141
if (doc->URL != NULL) {
13142
newDoc->URL = xmlStrdup(doc->URL);
13144
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13145
if (newRoot == NULL) {
13147
ctxt->sax = oldsax;
13148
ctxt->node_seq.maximum = 0;
13149
ctxt->node_seq.length = 0;
13150
ctxt->node_seq.buffer = NULL;
13151
xmlFreeParserCtxt(ctxt);
13152
newDoc->intSubset = NULL;
13153
newDoc->extSubset = NULL;
13154
xmlFreeDoc(newDoc);
13155
return(XML_ERR_INTERNAL_ERROR);
13157
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13158
nodePush(ctxt, newDoc->children);
13160
newRoot->doc = doc;
13163
* Get the 4 first bytes and decode the charset
13164
* if enc != XML_CHAR_ENCODING_NONE
13165
* plug some encoding conversion routines.
13168
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13173
enc = xmlDetectCharEncoding(start, 4);
13174
if (enc != XML_CHAR_ENCODING_NONE) {
13175
xmlSwitchEncoding(ctxt, enc);
13180
* Parse a possible text declaration first
13182
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13183
xmlParseTextDecl(ctxt);
13186
ctxt->instate = XML_PARSER_CONTENT;
13187
ctxt->depth = depth;
13189
xmlParseContent(ctxt);
13191
if ((RAW == '<') && (NXT(1) == '/')) {
13192
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13193
} else if (RAW != 0) {
13194
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13196
if (ctxt->node != newDoc->children) {
13197
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13200
if (!ctxt->wellFormed) {
13201
if (ctxt->errNo == 0)
13202
ret = XML_ERR_INTERNAL_ERROR;
13204
ret = (xmlParserErrors)ctxt->errNo;
13206
if (list != NULL) {
13210
* Return the newly created nodeset after unlinking it from
13211
* they pseudo parent.
13213
cur = newDoc->children->children;
13215
while (cur != NULL) {
13216
cur->parent = NULL;
13219
newDoc->children->children = NULL;
13225
* Record in the parent context the number of entities replacement
13226
* done when parsing that reference.
13228
if (oldctxt != NULL)
13229
oldctxt->nbentities += ctxt->nbentities;
13232
* Also record the size of the entity parsed
13234
if (ctxt->input != NULL) {
13235
oldctxt->sizeentities += ctxt->input->consumed;
13236
oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13239
* And record the last error if any
13241
if (ctxt->lastError.code != XML_ERR_OK)
13242
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13245
ctxt->sax = oldsax;
13246
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13247
oldctxt->node_seq.length = ctxt->node_seq.length;
13248
oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13249
ctxt->node_seq.maximum = 0;
13250
ctxt->node_seq.length = 0;
13251
ctxt->node_seq.buffer = NULL;
13252
xmlFreeParserCtxt(ctxt);
13253
newDoc->intSubset = NULL;
13254
newDoc->extSubset = NULL;
13255
xmlFreeDoc(newDoc);
13260
#ifdef LIBXML_SAX1_ENABLED
13262
* xmlParseExternalEntity:
13263
* @doc: the document the chunk pertains to
13264
* @sax: the SAX handler bloc (possibly NULL)
13265
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13266
* @depth: Used for loop detection, use 0
13267
* @URL: the URL for the entity to load
13268
* @ID: the System ID for the entity to load
13269
* @lst: the return value for the set of parsed nodes
13271
* Parse an external general entity
13272
* An external general parsed entity is well-formed if it matches the
13273
* production labeled extParsedEnt.
13275
* [78] extParsedEnt ::= TextDecl? content
13277
* Returns 0 if the entity is well formed, -1 in case of args problem and
13278
* the parser error code otherwise
13282
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13283
int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13284
return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13289
* xmlParseBalancedChunkMemory:
13290
* @doc: the document the chunk pertains to
13291
* @sax: the SAX handler bloc (possibly NULL)
13292
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13293
* @depth: Used for loop detection, use 0
13294
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13295
* @lst: the return value for the set of parsed nodes
13297
* Parse a well-balanced chunk of an XML document
13298
* called by the parser
13299
* The allowed sequence for the Well Balanced Chunk is the one defined by
13300
* the content production in the XML grammar:
13302
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13304
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13305
* the parser error code otherwise
13309
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13310
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13311
return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13312
depth, string, lst, 0 );
13314
#endif /* LIBXML_SAX1_ENABLED */
13317
* xmlParseBalancedChunkMemoryInternal:
13318
* @oldctxt: the existing parsing context
13319
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13320
* @user_data: the user data field for the parser context
13321
* @lst: the return value for the set of parsed nodes
13324
* Parse a well-balanced chunk of an XML document
13325
* called by the parser
13326
* The allowed sequence for the Well Balanced Chunk is the one defined by
13327
* the content production in the XML grammar:
13329
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13331
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13332
* error code otherwise
13334
* In case recover is set to 1, the nodelist will not be empty even if
13335
* the parsed chunk is not well balanced.
13337
static xmlParserErrors
13338
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13339
const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13340
xmlParserCtxtPtr ctxt;
13341
xmlDocPtr newDoc = NULL;
13342
xmlNodePtr newRoot;
13343
xmlSAXHandlerPtr oldsax = NULL;
13344
xmlNodePtr content = NULL;
13345
xmlNodePtr last = NULL;
13347
xmlParserErrors ret = XML_ERR_OK;
13352
if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13353
(oldctxt->depth > 1024)) {
13354
return(XML_ERR_ENTITY_LOOP);
13360
if (string == NULL)
13361
return(XML_ERR_INTERNAL_ERROR);
13363
size = xmlStrlen(string);
13365
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13366
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13367
if (user_data != NULL)
13368
ctxt->userData = user_data;
13370
ctxt->userData = ctxt;
13371
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13372
ctxt->dict = oldctxt->dict;
13373
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13374
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13375
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13378
/* propagate namespaces down the entity */
13379
for (i = 0;i < oldctxt->nsNr;i += 2) {
13380
nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13384
oldsax = ctxt->sax;
13385
ctxt->sax = oldctxt->sax;
13386
xmlDetectSAX2(ctxt);
13387
ctxt->replaceEntities = oldctxt->replaceEntities;
13388
ctxt->options = oldctxt->options;
13390
ctxt->_private = oldctxt->_private;
13391
if (oldctxt->myDoc == NULL) {
13392
newDoc = xmlNewDoc(BAD_CAST "1.0");
13393
if (newDoc == NULL) {
13394
ctxt->sax = oldsax;
13396
xmlFreeParserCtxt(ctxt);
13397
return(XML_ERR_INTERNAL_ERROR);
13399
newDoc->properties = XML_DOC_INTERNAL;
13400
newDoc->dict = ctxt->dict;
13401
xmlDictReference(newDoc->dict);
13402
ctxt->myDoc = newDoc;
13404
ctxt->myDoc = oldctxt->myDoc;
13405
content = ctxt->myDoc->children;
13406
last = ctxt->myDoc->last;
13408
newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13409
if (newRoot == NULL) {
13410
ctxt->sax = oldsax;
13412
xmlFreeParserCtxt(ctxt);
13413
if (newDoc != NULL) {
13414
xmlFreeDoc(newDoc);
13416
return(XML_ERR_INTERNAL_ERROR);
13418
ctxt->myDoc->children = NULL;
13419
ctxt->myDoc->last = NULL;
13420
xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13421
nodePush(ctxt, ctxt->myDoc->children);
13422
ctxt->instate = XML_PARSER_CONTENT;
13423
ctxt->depth = oldctxt->depth + 1;
13425
ctxt->validate = 0;
13426
ctxt->loadsubset = oldctxt->loadsubset;
13427
if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13429
* ID/IDREF registration will be done in xmlValidateElement below
13431
ctxt->loadsubset |= XML_SKIP_IDS;
13433
ctxt->dictNames = oldctxt->dictNames;
13434
ctxt->attsDefault = oldctxt->attsDefault;
13435
ctxt->attsSpecial = oldctxt->attsSpecial;
13437
xmlParseContent(ctxt);
13438
if ((RAW == '<') && (NXT(1) == '/')) {
13439
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13440
} else if (RAW != 0) {
13441
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13443
if (ctxt->node != ctxt->myDoc->children) {
13444
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13447
if (!ctxt->wellFormed) {
13448
if (ctxt->errNo == 0)
13449
ret = XML_ERR_INTERNAL_ERROR;
13451
ret = (xmlParserErrors)ctxt->errNo;
13456
if ((lst != NULL) && (ret == XML_ERR_OK)) {
13460
* Return the newly created nodeset after unlinking it from
13461
* they pseudo parent.
13463
cur = ctxt->myDoc->children->children;
13465
while (cur != NULL) {
13466
#ifdef LIBXML_VALID_ENABLED
13467
if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13468
(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13469
(cur->type == XML_ELEMENT_NODE)) {
13470
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13471
oldctxt->myDoc, cur);
13473
#endif /* LIBXML_VALID_ENABLED */
13474
cur->parent = NULL;
13477
ctxt->myDoc->children->children = NULL;
13479
if (ctxt->myDoc != NULL) {
13480
xmlFreeNode(ctxt->myDoc->children);
13481
ctxt->myDoc->children = content;
13482
ctxt->myDoc->last = last;
13486
* Record in the parent context the number of entities replacement
13487
* done when parsing that reference.
13489
if (oldctxt != NULL)
13490
oldctxt->nbentities += ctxt->nbentities;
13493
* Also record the last error if any
13495
if (ctxt->lastError.code != XML_ERR_OK)
13496
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13498
ctxt->sax = oldsax;
13500
ctxt->attsDefault = NULL;
13501
ctxt->attsSpecial = NULL;
13502
xmlFreeParserCtxt(ctxt);
13503
if (newDoc != NULL) {
13504
xmlFreeDoc(newDoc);
13511
* xmlParseInNodeContext:
13512
* @node: the context node
13513
* @data: the input string
13514
* @datalen: the input string length in bytes
13515
* @options: a combination of xmlParserOption
13516
* @lst: the return value for the set of parsed nodes
13518
* Parse a well-balanced chunk of an XML document
13519
* within the context (DTD, namespaces, etc ...) of the given node.
13521
* The allowed sequence for the data is a Well Balanced Chunk defined by
13522
* the content production in the XML grammar:
13524
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13526
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13527
* error code otherwise
13530
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13531
int options, xmlNodePtr *lst) {
13533
xmlParserCtxtPtr ctxt;
13534
xmlDocPtr doc = NULL;
13535
xmlNodePtr fake, cur;
13538
xmlParserErrors ret = XML_ERR_OK;
13541
* check all input parameters, grab the document
13543
if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13544
return(XML_ERR_INTERNAL_ERROR);
13545
switch (node->type) {
13546
case XML_ELEMENT_NODE:
13547
case XML_ATTRIBUTE_NODE:
13548
case XML_TEXT_NODE:
13549
case XML_CDATA_SECTION_NODE:
13550
case XML_ENTITY_REF_NODE:
13552
case XML_COMMENT_NODE:
13553
case XML_DOCUMENT_NODE:
13554
case XML_HTML_DOCUMENT_NODE:
13557
return(XML_ERR_INTERNAL_ERROR);
13560
while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13561
(node->type != XML_DOCUMENT_NODE) &&
13562
(node->type != XML_HTML_DOCUMENT_NODE))
13563
node = node->parent;
13565
return(XML_ERR_INTERNAL_ERROR);
13566
if (node->type == XML_ELEMENT_NODE)
13569
doc = (xmlDocPtr) node;
13571
return(XML_ERR_INTERNAL_ERROR);
13574
* allocate a context and set-up everything not related to the
13575
* node position in the tree
13577
if (doc->type == XML_DOCUMENT_NODE)
13578
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13579
#ifdef LIBXML_HTML_ENABLED
13580
else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13581
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13583
* When parsing in context, it makes no sense to add implied
13584
* elements like html/body/etc...
13586
options |= HTML_PARSE_NOIMPLIED;
13590
return(XML_ERR_INTERNAL_ERROR);
13593
return(XML_ERR_NO_MEMORY);
13596
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13597
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13598
* we must wait until the last moment to free the original one.
13600
if (doc->dict != NULL) {
13601
if (ctxt->dict != NULL)
13602
xmlDictFree(ctxt->dict);
13603
ctxt->dict = doc->dict;
13605
options |= XML_PARSE_NODICT;
13607
if (doc->encoding != NULL) {
13608
xmlCharEncodingHandlerPtr hdlr;
13610
if (ctxt->encoding != NULL)
13611
xmlFree((xmlChar *) ctxt->encoding);
13612
ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13614
hdlr = xmlFindCharEncodingHandler(doc->encoding);
13615
if (hdlr != NULL) {
13616
xmlSwitchToEncoding(ctxt, hdlr);
13618
return(XML_ERR_UNSUPPORTED_ENCODING);
13622
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13623
xmlDetectSAX2(ctxt);
13626
fake = xmlNewComment(NULL);
13627
if (fake == NULL) {
13628
xmlFreeParserCtxt(ctxt);
13629
return(XML_ERR_NO_MEMORY);
13631
xmlAddChild(node, fake);
13633
if (node->type == XML_ELEMENT_NODE) {
13634
nodePush(ctxt, node);
13636
* initialize the SAX2 namespaces stack
13639
while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13640
xmlNsPtr ns = cur->nsDef;
13641
const xmlChar *iprefix, *ihref;
13643
while (ns != NULL) {
13645
iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13646
ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13648
iprefix = ns->prefix;
13652
if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13653
nsPush(ctxt, iprefix, ihref);
13660
ctxt->instate = XML_PARSER_CONTENT;
13663
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13665
* ID/IDREF registration will be done in xmlValidateElement below
13667
ctxt->loadsubset |= XML_SKIP_IDS;
13670
#ifdef LIBXML_HTML_ENABLED
13671
if (doc->type == XML_HTML_DOCUMENT_NODE)
13672
__htmlParseContent(ctxt);
13675
xmlParseContent(ctxt);
13678
if ((RAW == '<') && (NXT(1) == '/')) {
13679
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13680
} else if (RAW != 0) {
13681
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13683
if ((ctxt->node != NULL) && (ctxt->node != node)) {
13684
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685
ctxt->wellFormed = 0;
13688
if (!ctxt->wellFormed) {
13689
if (ctxt->errNo == 0)
13690
ret = XML_ERR_INTERNAL_ERROR;
13692
ret = (xmlParserErrors)ctxt->errNo;
13698
* Return the newly created nodeset after unlinking it from
13699
* the pseudo sibling.
13712
while (cur != NULL) {
13713
cur->parent = NULL;
13717
xmlUnlinkNode(fake);
13721
if (ret != XML_ERR_OK) {
13722
xmlFreeNodeList(*lst);
13726
if (doc->dict != NULL)
13728
xmlFreeParserCtxt(ctxt);
13732
return(XML_ERR_INTERNAL_ERROR);
13736
#ifdef LIBXML_SAX1_ENABLED
13738
* xmlParseBalancedChunkMemoryRecover:
13739
* @doc: the document the chunk pertains to
13740
* @sax: the SAX handler bloc (possibly NULL)
13741
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13742
* @depth: Used for loop detection, use 0
13743
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13744
* @lst: the return value for the set of parsed nodes
13745
* @recover: return nodes even if the data is broken (use 0)
13748
* Parse a well-balanced chunk of an XML document
13749
* called by the parser
13750
* The allowed sequence for the Well Balanced Chunk is the one defined by
13751
* the content production in the XML grammar:
13753
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13755
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13756
* the parser error code otherwise
13758
* In case recover is set to 1, the nodelist will not be empty even if
13759
* the parsed chunk is not well balanced, assuming the parsing succeeded to
13763
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13764
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13766
xmlParserCtxtPtr ctxt;
13768
xmlSAXHandlerPtr oldsax = NULL;
13769
xmlNodePtr content, newRoot;
13774
return(XML_ERR_ENTITY_LOOP);
13780
if (string == NULL)
13783
size = xmlStrlen(string);
13785
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13786
if (ctxt == NULL) return(-1);
13787
ctxt->userData = ctxt;
13789
oldsax = ctxt->sax;
13791
if (user_data != NULL)
13792
ctxt->userData = user_data;
13794
newDoc = xmlNewDoc(BAD_CAST "1.0");
13795
if (newDoc == NULL) {
13796
xmlFreeParserCtxt(ctxt);
13799
newDoc->properties = XML_DOC_INTERNAL;
13800
if ((doc != NULL) && (doc->dict != NULL)) {
13801
xmlDictFree(ctxt->dict);
13802
ctxt->dict = doc->dict;
13803
xmlDictReference(ctxt->dict);
13804
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13805
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13806
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13807
ctxt->dictNames = 1;
13809
xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13812
newDoc->intSubset = doc->intSubset;
13813
newDoc->extSubset = doc->extSubset;
13815
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13816
if (newRoot == NULL) {
13818
ctxt->sax = oldsax;
13819
xmlFreeParserCtxt(ctxt);
13820
newDoc->intSubset = NULL;
13821
newDoc->extSubset = NULL;
13822
xmlFreeDoc(newDoc);
13825
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13826
nodePush(ctxt, newRoot);
13828
ctxt->myDoc = newDoc;
13830
ctxt->myDoc = newDoc;
13831
newDoc->children->doc = doc;
13832
/* Ensure that doc has XML spec namespace */
13833
xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13834
newDoc->oldNs = doc->oldNs;
13836
ctxt->instate = XML_PARSER_CONTENT;
13837
ctxt->depth = depth;
13840
* Doing validity checking on chunk doesn't make sense
13842
ctxt->validate = 0;
13843
ctxt->loadsubset = 0;
13844
xmlDetectSAX2(ctxt);
13846
if ( doc != NULL ){
13847
content = doc->children;
13848
doc->children = NULL;
13849
xmlParseContent(ctxt);
13850
doc->children = content;
13853
xmlParseContent(ctxt);
13855
if ((RAW == '<') && (NXT(1) == '/')) {
13856
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13857
} else if (RAW != 0) {
13858
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13860
if (ctxt->node != newDoc->children) {
13861
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13864
if (!ctxt->wellFormed) {
13865
if (ctxt->errNo == 0)
13873
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13877
* Return the newly created nodeset after unlinking it from
13878
* they pseudo parent.
13880
cur = newDoc->children->children;
13882
while (cur != NULL) {
13883
xmlSetTreeDoc(cur, doc);
13884
cur->parent = NULL;
13887
newDoc->children->children = NULL;
13891
ctxt->sax = oldsax;
13892
xmlFreeParserCtxt(ctxt);
13893
newDoc->intSubset = NULL;
13894
newDoc->extSubset = NULL;
13895
newDoc->oldNs = NULL;
13896
xmlFreeDoc(newDoc);
13902
* xmlSAXParseEntity:
13903
* @sax: the SAX handler block
13904
* @filename: the filename
13906
* parse an XML external entity out of context and build a tree.
13907
* It use the given SAX function block to handle the parsing callback.
13908
* If sax is NULL, fallback to the default DOM tree building routines.
13910
* [78] extParsedEnt ::= TextDecl? content
13912
* This correspond to a "Well Balanced" chunk
13914
* Returns the resulting document tree
13918
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13920
xmlParserCtxtPtr ctxt;
13922
ctxt = xmlCreateFileParserCtxt(filename);
13923
if (ctxt == NULL) {
13927
if (ctxt->sax != NULL)
13928
xmlFree(ctxt->sax);
13930
ctxt->userData = NULL;
13933
xmlParseExtParsedEnt(ctxt);
13935
if (ctxt->wellFormed)
13939
xmlFreeDoc(ctxt->myDoc);
13940
ctxt->myDoc = NULL;
13944
xmlFreeParserCtxt(ctxt);
13951
* @filename: the filename
13953
* parse an XML external entity out of context and build a tree.
13955
* [78] extParsedEnt ::= TextDecl? content
13957
* This correspond to a "Well Balanced" chunk
13959
* Returns the resulting document tree
13963
xmlParseEntity(const char *filename) {
13964
return(xmlSAXParseEntity(NULL, filename));
13966
#endif /* LIBXML_SAX1_ENABLED */
13969
* xmlCreateEntityParserCtxtInternal:
13970
* @URL: the entity URL
13971
* @ID: the entity PUBLIC ID
13972
* @base: a possible base for the target URI
13973
* @pctx: parser context used to set options on new context
13975
* Create a parser context for an external entity
13976
* Automatic support for ZLIB/Compress compressed document is provided
13977
* by default if found at compile-time.
13979
* Returns the new parser context or NULL
13981
static xmlParserCtxtPtr
13982
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13983
const xmlChar *base, xmlParserCtxtPtr pctx) {
13984
xmlParserCtxtPtr ctxt;
13985
xmlParserInputPtr inputStream;
13986
char *directory = NULL;
13989
ctxt = xmlNewParserCtxt();
13990
if (ctxt == NULL) {
13994
if (pctx != NULL) {
13995
ctxt->options = pctx->options;
13996
ctxt->_private = pctx->_private;
13999
uri = xmlBuildURI(URL, base);
14002
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14003
if (inputStream == NULL) {
14004
xmlFreeParserCtxt(ctxt);
14008
inputPush(ctxt, inputStream);
14010
if ((ctxt->directory == NULL) && (directory == NULL))
14011
directory = xmlParserGetDirectory((char *)URL);
14012
if ((ctxt->directory == NULL) && (directory != NULL))
14013
ctxt->directory = directory;
14015
inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14016
if (inputStream == NULL) {
14018
xmlFreeParserCtxt(ctxt);
14022
inputPush(ctxt, inputStream);
14024
if ((ctxt->directory == NULL) && (directory == NULL))
14025
directory = xmlParserGetDirectory((char *)uri);
14026
if ((ctxt->directory == NULL) && (directory != NULL))
14027
ctxt->directory = directory;
14034
* xmlCreateEntityParserCtxt:
14035
* @URL: the entity URL
14036
* @ID: the entity PUBLIC ID
14037
* @base: a possible base for the target URI
14039
* Create a parser context for an external entity
14040
* Automatic support for ZLIB/Compress compressed document is provided
14041
* by default if found at compile-time.
14043
* Returns the new parser context or NULL
14046
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14047
const xmlChar *base) {
14048
return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14052
/************************************************************************
14054
* Front ends when parsing from a file *
14056
************************************************************************/
14059
* xmlCreateURLParserCtxt:
14060
* @filename: the filename or URL
14061
* @options: a combination of xmlParserOption
14063
* Create a parser context for a file or URL content.
14064
* Automatic support for ZLIB/Compress compressed document is provided
14065
* by default if found at compile-time and for file accesses
14067
* Returns the new parser context or NULL
14070
xmlCreateURLParserCtxt(const char *filename, int options)
14072
xmlParserCtxtPtr ctxt;
14073
xmlParserInputPtr inputStream;
14074
char *directory = NULL;
14076
ctxt = xmlNewParserCtxt();
14077
if (ctxt == NULL) {
14078
xmlErrMemory(NULL, "cannot allocate parser context");
14083
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14084
ctxt->linenumbers = 1;
14086
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14087
if (inputStream == NULL) {
14088
xmlFreeParserCtxt(ctxt);
14092
inputPush(ctxt, inputStream);
14093
if ((ctxt->directory == NULL) && (directory == NULL))
14094
directory = xmlParserGetDirectory(filename);
14095
if ((ctxt->directory == NULL) && (directory != NULL))
14096
ctxt->directory = directory;
14102
* xmlCreateFileParserCtxt:
14103
* @filename: the filename
14105
* Create a parser context for a file content.
14106
* Automatic support for ZLIB/Compress compressed document is provided
14107
* by default if found at compile-time.
14109
* Returns the new parser context or NULL
14112
xmlCreateFileParserCtxt(const char *filename)
14114
return(xmlCreateURLParserCtxt(filename, 0));
14117
#ifdef LIBXML_SAX1_ENABLED
14119
* xmlSAXParseFileWithData:
14120
* @sax: the SAX handler block
14121
* @filename: the filename
14122
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14124
* @data: the userdata
14126
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14127
* compressed document is provided by default if found at compile-time.
14128
* It use the given SAX function block to handle the parsing callback.
14129
* If sax is NULL, fallback to the default DOM tree building routines.
14131
* User data (void *) is stored within the parser context in the
14132
* context's _private member, so it is available nearly everywhere in libxml
14134
* Returns the resulting document tree
14138
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14139
int recovery, void *data) {
14141
xmlParserCtxtPtr ctxt;
14145
ctxt = xmlCreateFileParserCtxt(filename);
14146
if (ctxt == NULL) {
14150
if (ctxt->sax != NULL)
14151
xmlFree(ctxt->sax);
14154
xmlDetectSAX2(ctxt);
14156
ctxt->_private = data;
14159
if (ctxt->directory == NULL)
14160
ctxt->directory = xmlParserGetDirectory(filename);
14162
ctxt->recovery = recovery;
14164
xmlParseDocument(ctxt);
14166
if ((ctxt->wellFormed) || recovery) {
14169
if (ctxt->input->buf->compressed > 0)
14170
ret->compression = 9;
14172
ret->compression = ctxt->input->buf->compressed;
14177
xmlFreeDoc(ctxt->myDoc);
14178
ctxt->myDoc = NULL;
14182
xmlFreeParserCtxt(ctxt);
14189
* @sax: the SAX handler block
14190
* @filename: the filename
14191
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14194
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14195
* compressed document is provided by default if found at compile-time.
14196
* It use the given SAX function block to handle the parsing callback.
14197
* If sax is NULL, fallback to the default DOM tree building routines.
14199
* Returns the resulting document tree
14203
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14205
return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14210
* @cur: a pointer to an array of xmlChar
14212
* parse an XML in-memory document and build a tree.
14213
* In the case the document is not Well Formed, a attempt to build a
14214
* tree is tried anyway
14216
* Returns the resulting document tree or NULL in case of failure
14220
xmlRecoverDoc(const xmlChar *cur) {
14221
return(xmlSAXParseDoc(NULL, cur, 1));
14226
* @filename: the filename
14228
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14229
* compressed document is provided by default if found at compile-time.
14231
* Returns the resulting document tree if the file was wellformed,
14236
xmlParseFile(const char *filename) {
14237
return(xmlSAXParseFile(NULL, filename, 0));
14242
* @filename: the filename
14244
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14245
* compressed document is provided by default if found at compile-time.
14246
* In the case the document is not Well Formed, it attempts to build
14249
* Returns the resulting document tree or NULL in case of failure
14253
xmlRecoverFile(const char *filename) {
14254
return(xmlSAXParseFile(NULL, filename, 1));
14259
* xmlSetupParserForBuffer:
14260
* @ctxt: an XML parser context
14261
* @buffer: a xmlChar * buffer
14262
* @filename: a file name
14264
* Setup the parser context to parse a new buffer; Clears any prior
14265
* contents from the parser context. The buffer parameter must not be
14266
* NULL, but the filename parameter can be
14269
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14270
const char* filename)
14272
xmlParserInputPtr input;
14274
if ((ctxt == NULL) || (buffer == NULL))
14277
input = xmlNewInputStream(ctxt);
14278
if (input == NULL) {
14279
xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14280
xmlClearParserCtxt(ctxt);
14284
xmlClearParserCtxt(ctxt);
14285
if (filename != NULL)
14286
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14287
input->base = buffer;
14288
input->cur = buffer;
14289
input->end = &buffer[xmlStrlen(buffer)];
14290
inputPush(ctxt, input);
14294
* xmlSAXUserParseFile:
14295
* @sax: a SAX handler
14296
* @user_data: The user data returned on SAX callbacks
14297
* @filename: a file name
14299
* parse an XML file and call the given SAX handler routines.
14300
* Automatic support for ZLIB/Compress compressed document is provided
14302
* Returns 0 in case of success or a error number otherwise
14305
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14306
const char *filename) {
14308
xmlParserCtxtPtr ctxt;
14310
ctxt = xmlCreateFileParserCtxt(filename);
14311
if (ctxt == NULL) return -1;
14312
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14313
xmlFree(ctxt->sax);
14315
xmlDetectSAX2(ctxt);
14317
if (user_data != NULL)
14318
ctxt->userData = user_data;
14320
xmlParseDocument(ctxt);
14322
if (ctxt->wellFormed)
14325
if (ctxt->errNo != 0)
14332
if (ctxt->myDoc != NULL) {
14333
xmlFreeDoc(ctxt->myDoc);
14334
ctxt->myDoc = NULL;
14336
xmlFreeParserCtxt(ctxt);
14340
#endif /* LIBXML_SAX1_ENABLED */
14342
/************************************************************************
14344
* Front ends when parsing from memory *
14346
************************************************************************/
14349
* xmlCreateMemoryParserCtxt:
14350
* @buffer: a pointer to a char array
14351
* @size: the size of the array
14353
* Create a parser context for an XML in-memory document.
14355
* Returns the new parser context or NULL
14358
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14359
xmlParserCtxtPtr ctxt;
14360
xmlParserInputPtr input;
14361
xmlParserInputBufferPtr buf;
14363
if (buffer == NULL)
14368
ctxt = xmlNewParserCtxt();
14372
/* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14373
buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14375
xmlFreeParserCtxt(ctxt);
14379
input = xmlNewInputStream(ctxt);
14380
if (input == NULL) {
14381
xmlFreeParserInputBuffer(buf);
14382
xmlFreeParserCtxt(ctxt);
14386
input->filename = NULL;
14388
xmlBufResetInput(input->buf->buffer, input);
14390
inputPush(ctxt, input);
14394
#ifdef LIBXML_SAX1_ENABLED
14396
* xmlSAXParseMemoryWithData:
14397
* @sax: the SAX handler block
14398
* @buffer: an pointer to a char array
14399
* @size: the size of the array
14400
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14402
* @data: the userdata
14404
* parse an XML in-memory block and use the given SAX function block
14405
* to handle the parsing callback. If sax is NULL, fallback to the default
14406
* DOM tree building routines.
14408
* User data (void *) is stored within the parser context in the
14409
* context's _private member, so it is available nearly everywhere in libxml
14411
* Returns the resulting document tree
14415
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14416
int size, int recovery, void *data) {
14418
xmlParserCtxtPtr ctxt;
14422
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14423
if (ctxt == NULL) return(NULL);
14425
if (ctxt->sax != NULL)
14426
xmlFree(ctxt->sax);
14429
xmlDetectSAX2(ctxt);
14431
ctxt->_private=data;
14434
ctxt->recovery = recovery;
14436
xmlParseDocument(ctxt);
14438
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14441
xmlFreeDoc(ctxt->myDoc);
14442
ctxt->myDoc = NULL;
14446
xmlFreeParserCtxt(ctxt);
14452
* xmlSAXParseMemory:
14453
* @sax: the SAX handler block
14454
* @buffer: an pointer to a char array
14455
* @size: the size of the array
14456
* @recovery: work in recovery mode, i.e. tries to read not Well Formed
14459
* parse an XML in-memory block and use the given SAX function block
14460
* to handle the parsing callback. If sax is NULL, fallback to the default
14461
* DOM tree building routines.
14463
* Returns the resulting document tree
14466
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14467
int size, int recovery) {
14468
return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14473
* @buffer: an pointer to a char array
14474
* @size: the size of the array
14476
* parse an XML in-memory block and build a tree.
14478
* Returns the resulting document tree
14481
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14482
return(xmlSAXParseMemory(NULL, buffer, size, 0));
14486
* xmlRecoverMemory:
14487
* @buffer: an pointer to a char array
14488
* @size: the size of the array
14490
* parse an XML in-memory block and build a tree.
14491
* In the case the document is not Well Formed, an attempt to
14492
* build a tree is tried anyway
14494
* Returns the resulting document tree or NULL in case of error
14497
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14498
return(xmlSAXParseMemory(NULL, buffer, size, 1));
14502
* xmlSAXUserParseMemory:
14503
* @sax: a SAX handler
14504
* @user_data: The user data returned on SAX callbacks
14505
* @buffer: an in-memory XML document input
14506
* @size: the length of the XML document in bytes
14508
* A better SAX parsing routine.
14509
* parse an XML in-memory buffer and call the given SAX handler routines.
14511
* Returns 0 in case of success or a error number otherwise
14513
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14514
const char *buffer, int size) {
14516
xmlParserCtxtPtr ctxt;
14520
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14521
if (ctxt == NULL) return -1;
14522
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14523
xmlFree(ctxt->sax);
14525
xmlDetectSAX2(ctxt);
14527
if (user_data != NULL)
14528
ctxt->userData = user_data;
14530
xmlParseDocument(ctxt);
14532
if (ctxt->wellFormed)
14535
if (ctxt->errNo != 0)
14542
if (ctxt->myDoc != NULL) {
14543
xmlFreeDoc(ctxt->myDoc);
14544
ctxt->myDoc = NULL;
14546
xmlFreeParserCtxt(ctxt);
14550
#endif /* LIBXML_SAX1_ENABLED */
14553
* xmlCreateDocParserCtxt:
14554
* @cur: a pointer to an array of xmlChar
14556
* Creates a parser context for an XML in-memory document.
14558
* Returns the new parser context or NULL
14561
xmlCreateDocParserCtxt(const xmlChar *cur) {
14566
len = xmlStrlen(cur);
14567
return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14570
#ifdef LIBXML_SAX1_ENABLED
14573
* @sax: the SAX handler block
14574
* @cur: a pointer to an array of xmlChar
14575
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14578
* parse an XML in-memory document and build a tree.
14579
* It use the given SAX function block to handle the parsing callback.
14580
* If sax is NULL, fallback to the default DOM tree building routines.
14582
* Returns the resulting document tree
14586
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14588
xmlParserCtxtPtr ctxt;
14589
xmlSAXHandlerPtr oldsax = NULL;
14591
if (cur == NULL) return(NULL);
14594
ctxt = xmlCreateDocParserCtxt(cur);
14595
if (ctxt == NULL) return(NULL);
14597
oldsax = ctxt->sax;
14599
ctxt->userData = NULL;
14601
xmlDetectSAX2(ctxt);
14603
xmlParseDocument(ctxt);
14604
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14607
xmlFreeDoc(ctxt->myDoc);
14608
ctxt->myDoc = NULL;
14611
ctxt->sax = oldsax;
14612
xmlFreeParserCtxt(ctxt);
14619
* @cur: a pointer to an array of xmlChar
14621
* parse an XML in-memory document and build a tree.
14623
* Returns the resulting document tree
14627
xmlParseDoc(const xmlChar *cur) {
14628
return(xmlSAXParseDoc(NULL, cur, 0));
14630
#endif /* LIBXML_SAX1_ENABLED */
14632
#ifdef LIBXML_LEGACY_ENABLED
14633
/************************************************************************
14635
* Specific function to keep track of entities references *
14636
* and used by the XSLT debugger *
14638
************************************************************************/
14640
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14643
* xmlAddEntityReference:
14644
* @ent : A valid entity
14645
* @firstNode : A valid first node for children of entity
14646
* @lastNode : A valid last node of children entity
14648
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14651
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14652
xmlNodePtr lastNode)
14654
if (xmlEntityRefFunc != NULL) {
14655
(*xmlEntityRefFunc) (ent, firstNode, lastNode);
14661
* xmlSetEntityReferenceFunc:
14662
* @func: A valid function
14664
* Set the function to call call back when a xml reference has been made
14667
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14669
xmlEntityRefFunc = func;
14671
#endif /* LIBXML_LEGACY_ENABLED */
14673
/************************************************************************
14677
************************************************************************/
14679
#ifdef LIBXML_XPATH_ENABLED
14680
#include <libxml/xpath.h>
14683
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14684
static int xmlParserInitialized = 0;
14689
* Initialization function for the XML parser.
14690
* This is not reentrant. Call once before processing in case of
14691
* use in multithreaded programs.
14695
xmlInitParser(void) {
14696
if (xmlParserInitialized != 0)
14699
#ifdef LIBXML_THREAD_ENABLED
14700
__xmlGlobalInitMutexLock();
14701
if (xmlParserInitialized == 0) {
14705
if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14706
(xmlGenericError == NULL))
14707
initGenericErrorDefaultFunc(NULL);
14709
xmlInitializeDict();
14710
xmlInitCharEncodingHandlers();
14711
xmlDefaultSAXHandlerInit();
14712
xmlRegisterDefaultInputCallbacks();
14713
#ifdef LIBXML_OUTPUT_ENABLED
14714
xmlRegisterDefaultOutputCallbacks();
14715
#endif /* LIBXML_OUTPUT_ENABLED */
14716
#ifdef LIBXML_HTML_ENABLED
14717
htmlInitAutoClose();
14718
htmlDefaultSAXHandlerInit();
14720
#ifdef LIBXML_XPATH_ENABLED
14723
xmlParserInitialized = 1;
14724
#ifdef LIBXML_THREAD_ENABLED
14726
__xmlGlobalInitMutexUnlock();
14731
* xmlCleanupParser:
14733
* This function name is somewhat misleading. It does not clean up
14734
* parser state, it cleans up memory allocated by the library itself.
14735
* It is a cleanup function for the XML library. It tries to reclaim all
14736
* related global memory allocated for the library processing.
14737
* It doesn't deallocate any document related memory. One should
14738
* call xmlCleanupParser() only when the process has finished using
14739
* the library and all XML/HTML documents built with it.
14740
* See also xmlInitParser() which has the opposite function of preparing
14741
* the library for operations.
14743
* WARNING: if your application is multithreaded or has plugin support
14744
* calling this may crash the application if another thread or
14745
* a plugin is still using libxml2. It's sometimes very hard to
14746
* guess if libxml2 is in use in the application, some libraries
14747
* or plugins may use it without notice. In case of doubt abstain
14748
* from calling this function or do it just before calling exit()
14749
* to avoid leak reports from valgrind !
14753
xmlCleanupParser(void) {
14754
if (!xmlParserInitialized)
14757
xmlCleanupCharEncodingHandlers();
14758
#ifdef LIBXML_CATALOG_ENABLED
14759
xmlCatalogCleanup();
14762
xmlCleanupInputCallbacks();
14763
#ifdef LIBXML_OUTPUT_ENABLED
14764
xmlCleanupOutputCallbacks();
14766
#ifdef LIBXML_SCHEMAS_ENABLED
14767
xmlSchemaCleanupTypes();
14768
xmlRelaxNGCleanupTypes();
14770
xmlResetLastError();
14771
xmlCleanupGlobals();
14772
xmlCleanupThreads(); /* must be last if called not from the main thread */
14773
xmlCleanupMemory();
14774
xmlParserInitialized = 0;
14777
/************************************************************************
14779
* New set (2.6.0) of simpler and more flexible APIs *
14781
************************************************************************/
14787
* Free a string if it is not owned by the "dict" dictionnary in the
14790
#define DICT_FREE(str) \
14791
if ((str) && ((!dict) || \
14792
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14793
xmlFree((char *)(str));
14797
* @ctxt: an XML parser context
14799
* Reset a parser context
14802
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14804
xmlParserInputPtr input;
14812
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14813
xmlFreeInputStream(input);
14816
ctxt->input = NULL;
14819
if (ctxt->spaceTab != NULL) {
14820
ctxt->spaceTab[0] = -1;
14821
ctxt->space = &ctxt->spaceTab[0];
14823
ctxt->space = NULL;
14833
DICT_FREE(ctxt->version);
14834
ctxt->version = NULL;
14835
DICT_FREE(ctxt->encoding);
14836
ctxt->encoding = NULL;
14837
DICT_FREE(ctxt->directory);
14838
ctxt->directory = NULL;
14839
DICT_FREE(ctxt->extSubURI);
14840
ctxt->extSubURI = NULL;
14841
DICT_FREE(ctxt->extSubSystem);
14842
ctxt->extSubSystem = NULL;
14843
if (ctxt->myDoc != NULL)
14844
xmlFreeDoc(ctxt->myDoc);
14845
ctxt->myDoc = NULL;
14847
ctxt->standalone = -1;
14848
ctxt->hasExternalSubset = 0;
14849
ctxt->hasPErefs = 0;
14851
ctxt->external = 0;
14852
ctxt->instate = XML_PARSER_START;
14855
ctxt->wellFormed = 1;
14856
ctxt->nsWellFormed = 1;
14857
ctxt->disableSAX = 0;
14860
ctxt->vctxt.userData = ctxt;
14861
ctxt->vctxt.error = xmlParserValidityError;
14862
ctxt->vctxt.warning = xmlParserValidityWarning;
14864
ctxt->record_info = 0;
14866
ctxt->checkIndex = 0;
14867
ctxt->inSubset = 0;
14868
ctxt->errNo = XML_ERR_OK;
14870
ctxt->charset = XML_CHAR_ENCODING_UTF8;
14871
ctxt->catalogs = NULL;
14872
ctxt->nbentities = 0;
14873
ctxt->sizeentities = 0;
14874
ctxt->sizeentcopy = 0;
14875
xmlInitNodeInfoSeq(&ctxt->node_seq);
14877
if (ctxt->attsDefault != NULL) {
14878
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14879
ctxt->attsDefault = NULL;
14881
if (ctxt->attsSpecial != NULL) {
14882
xmlHashFree(ctxt->attsSpecial, NULL);
14883
ctxt->attsSpecial = NULL;
14886
#ifdef LIBXML_CATALOG_ENABLED
14887
if (ctxt->catalogs != NULL)
14888
xmlCatalogFreeLocal(ctxt->catalogs);
14890
if (ctxt->lastError.code != XML_ERR_OK)
14891
xmlResetError(&ctxt->lastError);
14895
* xmlCtxtResetPush:
14896
* @ctxt: an XML parser context
14897
* @chunk: a pointer to an array of chars
14898
* @size: number of chars in the array
14899
* @filename: an optional file name or URI
14900
* @encoding: the document encoding, or NULL
14902
* Reset a push parser context
14904
* Returns 0 in case of success and 1 in case of error
14907
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14908
int size, const char *filename, const char *encoding)
14910
xmlParserInputPtr inputStream;
14911
xmlParserInputBufferPtr buf;
14912
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14917
if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14918
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14920
buf = xmlAllocParserInputBuffer(enc);
14924
if (ctxt == NULL) {
14925
xmlFreeParserInputBuffer(buf);
14929
xmlCtxtReset(ctxt);
14931
if (ctxt->pushTab == NULL) {
14932
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14933
sizeof(xmlChar *));
14934
if (ctxt->pushTab == NULL) {
14935
xmlErrMemory(ctxt, NULL);
14936
xmlFreeParserInputBuffer(buf);
14941
if (filename == NULL) {
14942
ctxt->directory = NULL;
14944
ctxt->directory = xmlParserGetDirectory(filename);
14947
inputStream = xmlNewInputStream(ctxt);
14948
if (inputStream == NULL) {
14949
xmlFreeParserInputBuffer(buf);
14953
if (filename == NULL)
14954
inputStream->filename = NULL;
14956
inputStream->filename = (char *)
14957
xmlCanonicPath((const xmlChar *) filename);
14958
inputStream->buf = buf;
14959
xmlBufResetInput(buf->buffer, inputStream);
14961
inputPush(ctxt, inputStream);
14963
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14964
(ctxt->input->buf != NULL)) {
14965
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14966
size_t cur = ctxt->input->cur - ctxt->input->base;
14968
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14970
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14972
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14976
if (encoding != NULL) {
14977
xmlCharEncodingHandlerPtr hdlr;
14979
if (ctxt->encoding != NULL)
14980
xmlFree((xmlChar *) ctxt->encoding);
14981
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14983
hdlr = xmlFindCharEncodingHandler(encoding);
14984
if (hdlr != NULL) {
14985
xmlSwitchToEncoding(ctxt, hdlr);
14987
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14988
"Unsupported encoding %s\n", BAD_CAST encoding);
14990
} else if (enc != XML_CHAR_ENCODING_NONE) {
14991
xmlSwitchEncoding(ctxt, enc);
14999
* xmlCtxtUseOptionsInternal:
15000
* @ctxt: an XML parser context
15001
* @options: a combination of xmlParserOption
15002
* @encoding: the user provided encoding to use
15004
* Applies the options to the parser context
15006
* Returns 0 in case of success, the set of unknown or unimplemented options
15007
* in case of error.
15010
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15014
if (encoding != NULL) {
15015
if (ctxt->encoding != NULL)
15016
xmlFree((xmlChar *) ctxt->encoding);
15017
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15019
if (options & XML_PARSE_RECOVER) {
15020
ctxt->recovery = 1;
15021
options -= XML_PARSE_RECOVER;
15022
ctxt->options |= XML_PARSE_RECOVER;
15024
ctxt->recovery = 0;
15025
if (options & XML_PARSE_DTDLOAD) {
15026
ctxt->loadsubset = XML_DETECT_IDS;
15027
options -= XML_PARSE_DTDLOAD;
15028
ctxt->options |= XML_PARSE_DTDLOAD;
15030
ctxt->loadsubset = 0;
15031
if (options & XML_PARSE_DTDATTR) {
15032
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15033
options -= XML_PARSE_DTDATTR;
15034
ctxt->options |= XML_PARSE_DTDATTR;
15036
if (options & XML_PARSE_NOENT) {
15037
ctxt->replaceEntities = 1;
15038
/* ctxt->loadsubset |= XML_DETECT_IDS; */
15039
options -= XML_PARSE_NOENT;
15040
ctxt->options |= XML_PARSE_NOENT;
15042
ctxt->replaceEntities = 0;
15043
if (options & XML_PARSE_PEDANTIC) {
15044
ctxt->pedantic = 1;
15045
options -= XML_PARSE_PEDANTIC;
15046
ctxt->options |= XML_PARSE_PEDANTIC;
15048
ctxt->pedantic = 0;
15049
if (options & XML_PARSE_NOBLANKS) {
15050
ctxt->keepBlanks = 0;
15051
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15052
options -= XML_PARSE_NOBLANKS;
15053
ctxt->options |= XML_PARSE_NOBLANKS;
15055
ctxt->keepBlanks = 1;
15056
if (options & XML_PARSE_DTDVALID) {
15057
ctxt->validate = 1;
15058
if (options & XML_PARSE_NOWARNING)
15059
ctxt->vctxt.warning = NULL;
15060
if (options & XML_PARSE_NOERROR)
15061
ctxt->vctxt.error = NULL;
15062
options -= XML_PARSE_DTDVALID;
15063
ctxt->options |= XML_PARSE_DTDVALID;
15065
ctxt->validate = 0;
15066
if (options & XML_PARSE_NOWARNING) {
15067
ctxt->sax->warning = NULL;
15068
options -= XML_PARSE_NOWARNING;
15070
if (options & XML_PARSE_NOERROR) {
15071
ctxt->sax->error = NULL;
15072
ctxt->sax->fatalError = NULL;
15073
options -= XML_PARSE_NOERROR;
15075
#ifdef LIBXML_SAX1_ENABLED
15076
if (options & XML_PARSE_SAX1) {
15077
ctxt->sax->startElement = xmlSAX2StartElement;
15078
ctxt->sax->endElement = xmlSAX2EndElement;
15079
ctxt->sax->startElementNs = NULL;
15080
ctxt->sax->endElementNs = NULL;
15081
ctxt->sax->initialized = 1;
15082
options -= XML_PARSE_SAX1;
15083
ctxt->options |= XML_PARSE_SAX1;
15085
#endif /* LIBXML_SAX1_ENABLED */
15086
if (options & XML_PARSE_NODICT) {
15087
ctxt->dictNames = 0;
15088
options -= XML_PARSE_NODICT;
15089
ctxt->options |= XML_PARSE_NODICT;
15091
ctxt->dictNames = 1;
15093
if (options & XML_PARSE_NOCDATA) {
15094
ctxt->sax->cdataBlock = NULL;
15095
options -= XML_PARSE_NOCDATA;
15096
ctxt->options |= XML_PARSE_NOCDATA;
15098
if (options & XML_PARSE_NSCLEAN) {
15099
ctxt->options |= XML_PARSE_NSCLEAN;
15100
options -= XML_PARSE_NSCLEAN;
15102
if (options & XML_PARSE_NONET) {
15103
ctxt->options |= XML_PARSE_NONET;
15104
options -= XML_PARSE_NONET;
15106
if (options & XML_PARSE_COMPACT) {
15107
ctxt->options |= XML_PARSE_COMPACT;
15108
options -= XML_PARSE_COMPACT;
15110
if (options & XML_PARSE_OLD10) {
15111
ctxt->options |= XML_PARSE_OLD10;
15112
options -= XML_PARSE_OLD10;
15114
if (options & XML_PARSE_NOBASEFIX) {
15115
ctxt->options |= XML_PARSE_NOBASEFIX;
15116
options -= XML_PARSE_NOBASEFIX;
15118
if (options & XML_PARSE_HUGE) {
15119
ctxt->options |= XML_PARSE_HUGE;
15120
options -= XML_PARSE_HUGE;
15121
if (ctxt->dict != NULL)
15122
xmlDictSetLimit(ctxt->dict, 0);
15124
if (options & XML_PARSE_OLDSAX) {
15125
ctxt->options |= XML_PARSE_OLDSAX;
15126
options -= XML_PARSE_OLDSAX;
15128
if (options & XML_PARSE_IGNORE_ENC) {
15129
ctxt->options |= XML_PARSE_IGNORE_ENC;
15130
options -= XML_PARSE_IGNORE_ENC;
15132
if (options & XML_PARSE_BIG_LINES) {
15133
ctxt->options |= XML_PARSE_BIG_LINES;
15134
options -= XML_PARSE_BIG_LINES;
15136
ctxt->linenumbers = 1;
15141
* xmlCtxtUseOptions:
15142
* @ctxt: an XML parser context
15143
* @options: a combination of xmlParserOption
15145
* Applies the options to the parser context
15147
* Returns 0 in case of success, the set of unknown or unimplemented options
15148
* in case of error.
15151
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15153
return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15158
* @ctxt: an XML parser context
15159
* @URL: the base URL to use for the document
15160
* @encoding: the document encoding, or NULL
15161
* @options: a combination of xmlParserOption
15162
* @reuse: keep the context for reuse
15164
* Common front-end for the xmlRead functions
15166
* Returns the resulting document tree or NULL
15169
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15170
int options, int reuse)
15174
xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15175
if (encoding != NULL) {
15176
xmlCharEncodingHandlerPtr hdlr;
15178
hdlr = xmlFindCharEncodingHandler(encoding);
15180
xmlSwitchToEncoding(ctxt, hdlr);
15182
if ((URL != NULL) && (ctxt->input != NULL) &&
15183
(ctxt->input->filename == NULL))
15184
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15185
xmlParseDocument(ctxt);
15186
if ((ctxt->wellFormed) || ctxt->recovery)
15190
if (ctxt->myDoc != NULL) {
15191
xmlFreeDoc(ctxt->myDoc);
15194
ctxt->myDoc = NULL;
15196
xmlFreeParserCtxt(ctxt);
15204
* @cur: a pointer to a zero terminated string
15205
* @URL: the base URL to use for the document
15206
* @encoding: the document encoding, or NULL
15207
* @options: a combination of xmlParserOption
15209
* parse an XML in-memory document and build a tree.
15211
* Returns the resulting document tree
15214
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15216
xmlParserCtxtPtr ctxt;
15221
ctxt = xmlCreateDocParserCtxt(cur);
15224
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15229
* @filename: a file or URL
15230
* @encoding: the document encoding, or NULL
15231
* @options: a combination of xmlParserOption
15233
* parse an XML file from the filesystem or the network.
15235
* Returns the resulting document tree
15238
xmlReadFile(const char *filename, const char *encoding, int options)
15240
xmlParserCtxtPtr ctxt;
15242
ctxt = xmlCreateURLParserCtxt(filename, options);
15245
return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15250
* @buffer: a pointer to a char array
15251
* @size: the size of the array
15252
* @URL: the base URL to use for the document
15253
* @encoding: the document encoding, or NULL
15254
* @options: a combination of xmlParserOption
15256
* parse an XML in-memory document and build a tree.
15258
* Returns the resulting document tree
15261
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15263
xmlParserCtxtPtr ctxt;
15265
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15268
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15273
* @fd: an open file descriptor
15274
* @URL: the base URL to use for the document
15275
* @encoding: the document encoding, or NULL
15276
* @options: a combination of xmlParserOption
15278
* parse an XML from a file descriptor and build a tree.
15279
* NOTE that the file descriptor will not be closed when the
15280
* reader is closed or reset.
15282
* Returns the resulting document tree
15285
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15287
xmlParserCtxtPtr ctxt;
15288
xmlParserInputBufferPtr input;
15289
xmlParserInputPtr stream;
15294
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15297
input->closecallback = NULL;
15298
ctxt = xmlNewParserCtxt();
15299
if (ctxt == NULL) {
15300
xmlFreeParserInputBuffer(input);
15303
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15304
if (stream == NULL) {
15305
xmlFreeParserInputBuffer(input);
15306
xmlFreeParserCtxt(ctxt);
15309
inputPush(ctxt, stream);
15310
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15315
* @ioread: an I/O read function
15316
* @ioclose: an I/O close function
15317
* @ioctx: an I/O handler
15318
* @URL: the base URL to use for the document
15319
* @encoding: the document encoding, or NULL
15320
* @options: a combination of xmlParserOption
15322
* parse an XML document from I/O functions and source and build a tree.
15324
* Returns the resulting document tree
15327
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15328
void *ioctx, const char *URL, const char *encoding, int options)
15330
xmlParserCtxtPtr ctxt;
15331
xmlParserInputBufferPtr input;
15332
xmlParserInputPtr stream;
15334
if (ioread == NULL)
15337
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15338
XML_CHAR_ENCODING_NONE);
15339
if (input == NULL) {
15340
if (ioclose != NULL)
15344
ctxt = xmlNewParserCtxt();
15345
if (ctxt == NULL) {
15346
xmlFreeParserInputBuffer(input);
15349
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15350
if (stream == NULL) {
15351
xmlFreeParserInputBuffer(input);
15352
xmlFreeParserCtxt(ctxt);
15355
inputPush(ctxt, stream);
15356
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15361
* @ctxt: an XML parser context
15362
* @cur: a pointer to a zero terminated string
15363
* @URL: the base URL to use for the document
15364
* @encoding: the document encoding, or NULL
15365
* @options: a combination of xmlParserOption
15367
* parse an XML in-memory document and build a tree.
15368
* This reuses the existing @ctxt parser context
15370
* Returns the resulting document tree
15373
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15374
const char *URL, const char *encoding, int options)
15376
xmlParserInputPtr stream;
15383
xmlCtxtReset(ctxt);
15385
stream = xmlNewStringInputStream(ctxt, cur);
15386
if (stream == NULL) {
15389
inputPush(ctxt, stream);
15390
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15395
* @ctxt: an XML parser context
15396
* @filename: a file or URL
15397
* @encoding: the document encoding, or NULL
15398
* @options: a combination of xmlParserOption
15400
* parse an XML file from the filesystem or the network.
15401
* This reuses the existing @ctxt parser context
15403
* Returns the resulting document tree
15406
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15407
const char *encoding, int options)
15409
xmlParserInputPtr stream;
15411
if (filename == NULL)
15416
xmlCtxtReset(ctxt);
15418
stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15419
if (stream == NULL) {
15422
inputPush(ctxt, stream);
15423
return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15427
* xmlCtxtReadMemory:
15428
* @ctxt: an XML parser context
15429
* @buffer: a pointer to a char array
15430
* @size: the size of the array
15431
* @URL: the base URL to use for the document
15432
* @encoding: the document encoding, or NULL
15433
* @options: a combination of xmlParserOption
15435
* parse an XML in-memory document and build a tree.
15436
* This reuses the existing @ctxt parser context
15438
* Returns the resulting document tree
15441
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15442
const char *URL, const char *encoding, int options)
15444
xmlParserInputBufferPtr input;
15445
xmlParserInputPtr stream;
15449
if (buffer == NULL)
15452
xmlCtxtReset(ctxt);
15454
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15455
if (input == NULL) {
15459
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15460
if (stream == NULL) {
15461
xmlFreeParserInputBuffer(input);
15465
inputPush(ctxt, stream);
15466
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15471
* @ctxt: an XML parser context
15472
* @fd: an open file descriptor
15473
* @URL: the base URL to use for the document
15474
* @encoding: the document encoding, or NULL
15475
* @options: a combination of xmlParserOption
15477
* parse an XML from a file descriptor and build a tree.
15478
* This reuses the existing @ctxt parser context
15479
* NOTE that the file descriptor will not be closed when the
15480
* reader is closed or reset.
15482
* Returns the resulting document tree
15485
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15486
const char *URL, const char *encoding, int options)
15488
xmlParserInputBufferPtr input;
15489
xmlParserInputPtr stream;
15496
xmlCtxtReset(ctxt);
15499
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15502
input->closecallback = NULL;
15503
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15504
if (stream == NULL) {
15505
xmlFreeParserInputBuffer(input);
15508
inputPush(ctxt, stream);
15509
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15514
* @ctxt: an XML parser context
15515
* @ioread: an I/O read function
15516
* @ioclose: an I/O close function
15517
* @ioctx: an I/O handler
15518
* @URL: the base URL to use for the document
15519
* @encoding: the document encoding, or NULL
15520
* @options: a combination of xmlParserOption
15522
* parse an XML document from I/O functions and source and build a tree.
15523
* This reuses the existing @ctxt parser context
15525
* Returns the resulting document tree
15528
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15529
xmlInputCloseCallback ioclose, void *ioctx,
15531
const char *encoding, int options)
15533
xmlParserInputBufferPtr input;
15534
xmlParserInputPtr stream;
15536
if (ioread == NULL)
15541
xmlCtxtReset(ctxt);
15543
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15544
XML_CHAR_ENCODING_NONE);
15545
if (input == NULL) {
15546
if (ioclose != NULL)
15550
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15551
if (stream == NULL) {
15552
xmlFreeParserInputBuffer(input);
15555
inputPush(ctxt, stream);
15556
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15559
#define bottom_parser
15560
#include "elfgcchack.h"