2
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
* implemented on top of the SAX interfaces
6
* The XML specification:
7
* http://www.w3.org/TR/REC-xml
8
* Original 1.0 version:
9
* http://www.w3.org/TR/1998/REC-xml-19980210
10
* XML second edition working draft
11
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
13
* Okay this is a big file, the parser core is around 7000 lines, then it
14
* is followed by the progressive parser top routines, then the various
15
* high level APIs to call the parser and a few miscellaneous functions.
16
* A number of helper functions and deprecated ones have been moved to
17
* parserInternals.c to reduce this file size.
18
* As much as possible the functions are associated with their relative
19
* production in the XML specification. A few productions defining the
20
* different ranges of character are actually implanted either in
21
* parserInternals.h or parserInternals.c
22
* The DOM tree build is realized from the default SAX callbacks in
24
* The routines doing the validation checks are in valid.c and called either
25
* from the SAX callbacks or as standalone functions using a preparsed
28
* See Copyright for the status of this software.
36
#if defined(WIN32) && !defined (__CYGWIN__)
37
#define XML_DIR_SEP '\\'
39
#define XML_DIR_SEP '/'
46
#include <libxml/xmlmemory.h>
47
#include <libxml/threads.h>
48
#include <libxml/globals.h>
49
#include <libxml/tree.h>
50
#include <libxml/parser.h>
51
#include <libxml/parserInternals.h>
52
#include <libxml/valid.h>
53
#include <libxml/entities.h>
54
#include <libxml/xmlerror.h>
55
#include <libxml/encoding.h>
56
#include <libxml/xmlIO.h>
57
#include <libxml/uri.h>
58
#ifdef LIBXML_CATALOG_ENABLED
59
#include <libxml/catalog.h>
61
#ifdef LIBXML_SCHEMAS_ENABLED
62
#include <libxml/xmlschemastypes.h>
63
#include <libxml/relaxng.h>
71
#ifdef HAVE_SYS_STAT_H
91
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93
static xmlParserCtxtPtr
94
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95
const xmlChar *base, xmlParserCtxtPtr pctx);
97
/************************************************************************
99
* Arbitrary limits set in the parser. See XML_PARSE_HUGE *
101
************************************************************************/
103
#define XML_PARSER_BIG_ENTITY 1000
104
#define XML_PARSER_LOT_ENTITY 5000
107
* XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108
* replacement over the size in byte of the input indicates that you have
109
* and eponential behaviour. A value of 10 correspond to at least 3 entity
110
* replacement per byte of input.
112
#define XML_PARSER_NON_LINEAR 10
115
* xmlParserEntityCheck
117
* Function to check non-linear entity expansion behaviour
118
* This is here to detect and stop exponential linear entity expansion
119
* This is not a limitation of the parser but a safety
120
* boundary feature. It can be disabled with the XML_PARSE_HUGE
124
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125
xmlEntityPtr ent, size_t replacement)
129
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
131
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
133
if (replacement != 0) {
134
if (replacement < XML_MAX_TEXT_LENGTH)
138
* If the volume of entity copy reaches 10 times the
139
* amount of parsed data and over the large text threshold
140
* then that's very likely to be an abuse.
142
if (ctxt->input != NULL) {
143
consumed = ctxt->input->consumed +
144
(ctxt->input->cur - ctxt->input->base);
146
consumed += ctxt->sizeentities;
148
if (replacement < XML_PARSER_NON_LINEAR * consumed)
150
} else if (size != 0) {
152
* Do the check based on the replacement size of the entity
154
if (size < XML_PARSER_BIG_ENTITY)
158
* A limit on the amount of text data reasonably used
160
if (ctxt->input != NULL) {
161
consumed = ctxt->input->consumed +
162
(ctxt->input->cur - ctxt->input->base);
164
consumed += ctxt->sizeentities;
166
if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167
(ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
169
} else if (ent != NULL) {
171
* use the number of parsed entities in the replacement
173
size = ent->checked / 2;
176
* The amount of data parsed counting entities size only once
178
if (ctxt->input != NULL) {
179
consumed = ctxt->input->consumed +
180
(ctxt->input->cur - ctxt->input->base);
182
consumed += ctxt->sizeentities;
185
* Check the density of entities for the amount of data
186
* knowing an entity reference will take at least 3 bytes
188
if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
192
* strange we got no data for checking just return
196
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
203
* arbitrary depth limit for the XML documents that we allow to
204
* process. This is not a limitation of the parser but a safety
205
* boundary feature. It can be disabled with the XML_PARSE_HUGE
208
unsigned int xmlParserMaxDepth = 256;
213
#define XML_PARSER_BIG_BUFFER_SIZE 300
214
#define XML_PARSER_BUFFER_SIZE 100
215
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
218
* XML_PARSER_CHUNK_SIZE
220
* When calling GROW that's the minimal amount of data
221
* the parser expected to have received. It is not a hard
222
* limit but an optimization when reading strings like Names
223
* It is not strictly needed as long as inputs available characters
224
* are followed by 0, which should be provided by the I/O level
226
#define XML_PARSER_CHUNK_SIZE 100
229
* List of XML prefixed PI allowed by W3C specs
232
static const char *xmlW3CPIs[] = {
239
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
240
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241
const xmlChar **str);
243
static xmlParserErrors
244
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245
xmlSAXHandlerPtr sax,
246
void *user_data, int depth, const xmlChar *URL,
247
const xmlChar *ID, xmlNodePtr *list);
250
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251
const char *encoding);
252
#ifdef LIBXML_LEGACY_ENABLED
254
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255
xmlNodePtr lastNode);
256
#endif /* LIBXML_LEGACY_ENABLED */
258
static xmlParserErrors
259
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260
const xmlChar *string, void *user_data, xmlNodePtr *lst);
263
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
265
/************************************************************************
267
* Some factorized error routines *
269
************************************************************************/
272
* xmlErrAttributeDup:
273
* @ctxt: an XML parser context
274
* @prefix: the attribute prefix
275
* @localname: the attribute localname
277
* Handle a redefinition of attribute error
280
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281
const xmlChar * localname)
283
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284
(ctxt->instate == XML_PARSER_EOF))
287
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
290
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
291
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
292
(const char *) localname, NULL, NULL, 0, 0,
293
"Attribute %s redefined\n", localname);
295
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
296
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
297
(const char *) prefix, (const char *) localname,
298
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
301
ctxt->wellFormed = 0;
302
if (ctxt->recovery == 0)
303
ctxt->disableSAX = 1;
309
* @ctxt: an XML parser context
310
* @error: the error number
311
* @extra: extra information string
313
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
316
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
319
char errstr[129] = "";
321
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322
(ctxt->instate == XML_PARSER_EOF))
325
case XML_ERR_INVALID_HEX_CHARREF:
326
errmsg = "CharRef: invalid hexadecimal value";
328
case XML_ERR_INVALID_DEC_CHARREF:
329
errmsg = "CharRef: invalid decimal value";
331
case XML_ERR_INVALID_CHARREF:
332
errmsg = "CharRef: invalid value";
334
case XML_ERR_INTERNAL_ERROR:
335
errmsg = "internal error";
337
case XML_ERR_PEREF_AT_EOF:
338
errmsg = "PEReference at end of document";
340
case XML_ERR_PEREF_IN_PROLOG:
341
errmsg = "PEReference in prolog";
343
case XML_ERR_PEREF_IN_EPILOG:
344
errmsg = "PEReference in epilog";
346
case XML_ERR_PEREF_NO_NAME:
347
errmsg = "PEReference: no name";
349
case XML_ERR_PEREF_SEMICOL_MISSING:
350
errmsg = "PEReference: expecting ';'";
352
case XML_ERR_ENTITY_LOOP:
353
errmsg = "Detected an entity reference loop";
355
case XML_ERR_ENTITY_NOT_STARTED:
356
errmsg = "EntityValue: \" or ' expected";
358
case XML_ERR_ENTITY_PE_INTERNAL:
359
errmsg = "PEReferences forbidden in internal subset";
361
case XML_ERR_ENTITY_NOT_FINISHED:
362
errmsg = "EntityValue: \" or ' expected";
364
case XML_ERR_ATTRIBUTE_NOT_STARTED:
365
errmsg = "AttValue: \" or ' expected";
367
case XML_ERR_LT_IN_ATTRIBUTE:
368
errmsg = "Unescaped '<' not allowed in attributes values";
370
case XML_ERR_LITERAL_NOT_STARTED:
371
errmsg = "SystemLiteral \" or ' expected";
373
case XML_ERR_LITERAL_NOT_FINISHED:
374
errmsg = "Unfinished System or Public ID \" or ' expected";
376
case XML_ERR_MISPLACED_CDATA_END:
377
errmsg = "Sequence ']]>' not allowed in content";
379
case XML_ERR_URI_REQUIRED:
380
errmsg = "SYSTEM or PUBLIC, the URI is missing";
382
case XML_ERR_PUBID_REQUIRED:
383
errmsg = "PUBLIC, the Public Identifier is missing";
385
case XML_ERR_HYPHEN_IN_COMMENT:
386
errmsg = "Comment must not contain '--' (double-hyphen)";
388
case XML_ERR_PI_NOT_STARTED:
389
errmsg = "xmlParsePI : no target name";
391
case XML_ERR_RESERVED_XML_NAME:
392
errmsg = "Invalid PI name";
394
case XML_ERR_NOTATION_NOT_STARTED:
395
errmsg = "NOTATION: Name expected here";
397
case XML_ERR_NOTATION_NOT_FINISHED:
398
errmsg = "'>' required to close NOTATION declaration";
400
case XML_ERR_VALUE_REQUIRED:
401
errmsg = "Entity value required";
403
case XML_ERR_URI_FRAGMENT:
404
errmsg = "Fragment not allowed";
406
case XML_ERR_ATTLIST_NOT_STARTED:
407
errmsg = "'(' required to start ATTLIST enumeration";
409
case XML_ERR_NMTOKEN_REQUIRED:
410
errmsg = "NmToken expected in ATTLIST enumeration";
412
case XML_ERR_ATTLIST_NOT_FINISHED:
413
errmsg = "')' required to finish ATTLIST enumeration";
415
case XML_ERR_MIXED_NOT_STARTED:
416
errmsg = "MixedContentDecl : '|' or ')*' expected";
418
case XML_ERR_PCDATA_REQUIRED:
419
errmsg = "MixedContentDecl : '#PCDATA' expected";
421
case XML_ERR_ELEMCONTENT_NOT_STARTED:
422
errmsg = "ContentDecl : Name or '(' expected";
424
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
425
errmsg = "ContentDecl : ',' '|' or ')' expected";
427
case XML_ERR_PEREF_IN_INT_SUBSET:
429
"PEReference: forbidden within markup decl in internal subset";
431
case XML_ERR_GT_REQUIRED:
432
errmsg = "expected '>'";
434
case XML_ERR_CONDSEC_INVALID:
435
errmsg = "XML conditional section '[' expected";
437
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
438
errmsg = "Content error in the external subset";
440
case XML_ERR_CONDSEC_INVALID_KEYWORD:
442
"conditional section INCLUDE or IGNORE keyword expected";
444
case XML_ERR_CONDSEC_NOT_FINISHED:
445
errmsg = "XML conditional section not closed";
447
case XML_ERR_XMLDECL_NOT_STARTED:
448
errmsg = "Text declaration '<?xml' required";
450
case XML_ERR_XMLDECL_NOT_FINISHED:
451
errmsg = "parsing XML declaration: '?>' expected";
453
case XML_ERR_EXT_ENTITY_STANDALONE:
454
errmsg = "external parsed entities cannot be standalone";
456
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
457
errmsg = "EntityRef: expecting ';'";
459
case XML_ERR_DOCTYPE_NOT_FINISHED:
460
errmsg = "DOCTYPE improperly terminated";
462
case XML_ERR_LTSLASH_REQUIRED:
463
errmsg = "EndTag: '</' not found";
465
case XML_ERR_EQUAL_REQUIRED:
466
errmsg = "expected '='";
468
case XML_ERR_STRING_NOT_CLOSED:
469
errmsg = "String not closed expecting \" or '";
471
case XML_ERR_STRING_NOT_STARTED:
472
errmsg = "String not started expecting ' or \"";
474
case XML_ERR_ENCODING_NAME:
475
errmsg = "Invalid XML encoding name";
477
case XML_ERR_STANDALONE_VALUE:
478
errmsg = "standalone accepts only 'yes' or 'no'";
480
case XML_ERR_DOCUMENT_EMPTY:
481
errmsg = "Document is empty";
483
case XML_ERR_DOCUMENT_END:
484
errmsg = "Extra content at the end of the document";
486
case XML_ERR_NOT_WELL_BALANCED:
487
errmsg = "chunk is not well balanced";
489
case XML_ERR_EXTRA_CONTENT:
490
errmsg = "extra content at the end of well balanced chunk";
492
case XML_ERR_VERSION_MISSING:
493
errmsg = "Malformed declaration expecting version";
495
case XML_ERR_NAME_TOO_LONG:
496
errmsg = "Name too long use XML_PARSE_HUGE option";
504
errmsg = "Unregistered error message";
507
snprintf(errstr, 128, "%s\n", errmsg);
509
snprintf(errstr, 128, "%s: %%s\n", errmsg);
512
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
513
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
516
ctxt->wellFormed = 0;
517
if (ctxt->recovery == 0)
518
ctxt->disableSAX = 1;
524
* @ctxt: an XML parser context
525
* @error: the error number
526
* @msg: the error message
528
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
531
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
534
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535
(ctxt->instate == XML_PARSER_EOF))
539
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
540
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
542
ctxt->wellFormed = 0;
543
if (ctxt->recovery == 0)
544
ctxt->disableSAX = 1;
550
* @ctxt: an XML parser context
551
* @error: the error number
552
* @msg: the error message
559
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560
const char *msg, const xmlChar *str1, const xmlChar *str2)
562
xmlStructuredErrorFunc schannel = NULL;
564
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565
(ctxt->instate == XML_PARSER_EOF))
567
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568
(ctxt->sax->initialized == XML_SAX2_MAGIC))
569
schannel = ctxt->sax->serror;
571
__xmlRaiseError(schannel,
572
(ctxt->sax) ? ctxt->sax->warning : NULL,
574
ctxt, NULL, XML_FROM_PARSER, error,
575
XML_ERR_WARNING, NULL, 0,
576
(const char *) str1, (const char *) str2, NULL, 0, 0,
577
msg, (const char *) str1, (const char *) str2);
579
__xmlRaiseError(schannel, NULL, NULL,
580
ctxt, NULL, XML_FROM_PARSER, error,
581
XML_ERR_WARNING, NULL, 0,
582
(const char *) str1, (const char *) str2, NULL, 0, 0,
583
msg, (const char *) str1, (const char *) str2);
589
* @ctxt: an XML parser context
590
* @error: the error number
591
* @msg: the error message
594
* Handle a validity error.
597
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
598
const char *msg, const xmlChar *str1, const xmlChar *str2)
600
xmlStructuredErrorFunc schannel = NULL;
602
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603
(ctxt->instate == XML_PARSER_EOF))
607
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608
schannel = ctxt->sax->serror;
611
__xmlRaiseError(schannel,
612
ctxt->vctxt.error, ctxt->vctxt.userData,
613
ctxt, NULL, XML_FROM_DTD, error,
614
XML_ERR_ERROR, NULL, 0, (const char *) str1,
615
(const char *) str2, NULL, 0, 0,
616
msg, (const char *) str1, (const char *) str2);
619
__xmlRaiseError(schannel, NULL, NULL,
620
ctxt, NULL, XML_FROM_DTD, error,
621
XML_ERR_ERROR, NULL, 0, (const char *) str1,
622
(const char *) str2, NULL, 0, 0,
623
msg, (const char *) str1, (const char *) str2);
629
* @ctxt: an XML parser context
630
* @error: the error number
631
* @msg: the error message
632
* @val: an integer value
634
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
637
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
638
const char *msg, int val)
640
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641
(ctxt->instate == XML_PARSER_EOF))
645
__xmlRaiseError(NULL, NULL, NULL,
646
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
649
ctxt->wellFormed = 0;
650
if (ctxt->recovery == 0)
651
ctxt->disableSAX = 1;
656
* xmlFatalErrMsgStrIntStr:
657
* @ctxt: an XML parser context
658
* @error: the error number
659
* @msg: the error message
660
* @str1: an string info
661
* @val: an integer value
662
* @str2: an string info
664
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
667
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
668
const char *msg, const xmlChar *str1, int val,
671
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672
(ctxt->instate == XML_PARSER_EOF))
676
__xmlRaiseError(NULL, NULL, NULL,
677
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678
NULL, 0, (const char *) str1, (const char *) str2,
679
NULL, val, 0, msg, str1, val, str2);
681
ctxt->wellFormed = 0;
682
if (ctxt->recovery == 0)
683
ctxt->disableSAX = 1;
689
* @ctxt: an XML parser context
690
* @error: the error number
691
* @msg: the error message
692
* @val: a string value
694
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
697
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
698
const char *msg, const xmlChar * val)
700
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701
(ctxt->instate == XML_PARSER_EOF))
705
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
706
XML_FROM_PARSER, error, XML_ERR_FATAL,
707
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
710
ctxt->wellFormed = 0;
711
if (ctxt->recovery == 0)
712
ctxt->disableSAX = 1;
718
* @ctxt: an XML parser context
719
* @error: the error number
720
* @msg: the error message
721
* @val: a string value
723
* Handle a non fatal parser error
726
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727
const char *msg, const xmlChar * val)
729
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730
(ctxt->instate == XML_PARSER_EOF))
734
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
735
XML_FROM_PARSER, error, XML_ERR_ERROR,
736
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
742
* @ctxt: an XML parser context
743
* @error: the error number
745
* @info1: extra information string
746
* @info2: extra information string
748
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
751
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753
const xmlChar * info1, const xmlChar * info2,
754
const xmlChar * info3)
756
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757
(ctxt->instate == XML_PARSER_EOF))
761
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
762
XML_ERR_ERROR, NULL, 0, (const char *) info1,
763
(const char *) info2, (const char *) info3, 0, 0, msg,
764
info1, info2, info3);
766
ctxt->nsWellFormed = 0;
771
* @ctxt: an XML parser context
772
* @error: the error number
774
* @info1: extra information string
775
* @info2: extra information string
777
* Handle a namespace warning error
780
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
782
const xmlChar * info1, const xmlChar * info2,
783
const xmlChar * info3)
785
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786
(ctxt->instate == XML_PARSER_EOF))
788
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789
XML_ERR_WARNING, NULL, 0, (const char *) info1,
790
(const char *) info2, (const char *) info3, 0, 0, msg,
791
info1, info2, info3);
794
/************************************************************************
796
* Library wide options *
798
************************************************************************/
802
* @feature: the feature to be examined
804
* Examines if the library has been compiled with a given feature.
806
* Returns a non-zero value if the feature exist, otherwise zero.
807
* Returns zero (0) if the feature does not exist or an unknown
808
* unknown feature is requested, non-zero otherwise.
811
xmlHasFeature(xmlFeature feature)
814
case XML_WITH_THREAD:
815
#ifdef LIBXML_THREAD_ENABLED
821
#ifdef LIBXML_TREE_ENABLED
826
case XML_WITH_OUTPUT:
827
#ifdef LIBXML_OUTPUT_ENABLED
833
#ifdef LIBXML_PUSH_ENABLED
838
case XML_WITH_READER:
839
#ifdef LIBXML_READER_ENABLED
844
case XML_WITH_PATTERN:
845
#ifdef LIBXML_PATTERN_ENABLED
850
case XML_WITH_WRITER:
851
#ifdef LIBXML_WRITER_ENABLED
857
#ifdef LIBXML_SAX1_ENABLED
863
#ifdef LIBXML_FTP_ENABLED
869
#ifdef LIBXML_HTTP_ENABLED
875
#ifdef LIBXML_VALID_ENABLED
881
#ifdef LIBXML_HTML_ENABLED
886
case XML_WITH_LEGACY:
887
#ifdef LIBXML_LEGACY_ENABLED
893
#ifdef LIBXML_C14N_ENABLED
898
case XML_WITH_CATALOG:
899
#ifdef LIBXML_CATALOG_ENABLED
905
#ifdef LIBXML_XPATH_ENABLED
911
#ifdef LIBXML_XPTR_ENABLED
916
case XML_WITH_XINCLUDE:
917
#ifdef LIBXML_XINCLUDE_ENABLED
923
#ifdef LIBXML_ICONV_ENABLED
928
case XML_WITH_ISO8859X:
929
#ifdef LIBXML_ISO8859X_ENABLED
934
case XML_WITH_UNICODE:
935
#ifdef LIBXML_UNICODE_ENABLED
940
case XML_WITH_REGEXP:
941
#ifdef LIBXML_REGEXP_ENABLED
946
case XML_WITH_AUTOMATA:
947
#ifdef LIBXML_AUTOMATA_ENABLED
953
#ifdef LIBXML_EXPR_ENABLED
958
case XML_WITH_SCHEMAS:
959
#ifdef LIBXML_SCHEMAS_ENABLED
964
case XML_WITH_SCHEMATRON:
965
#ifdef LIBXML_SCHEMATRON_ENABLED
970
case XML_WITH_MODULES:
971
#ifdef LIBXML_MODULES_ENABLED
977
#ifdef LIBXML_DEBUG_ENABLED
982
case XML_WITH_DEBUG_MEM:
983
#ifdef DEBUG_MEMORY_LOCATION
988
case XML_WITH_DEBUG_RUN:
989
#ifdef LIBXML_DEBUG_RUNTIME
995
#ifdef LIBXML_ZLIB_ENABLED
1001
#ifdef LIBXML_LZMA_ENABLED
1007
#ifdef LIBXML_ICU_ENABLED
1018
/************************************************************************
1020
* SAX2 defaulted attributes handling *
1022
************************************************************************/
1026
* @ctxt: an XML parser context
1028
* Do the SAX2 detection and specific intialization
1031
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032
if (ctxt == NULL) return;
1033
#ifdef LIBXML_SAX1_ENABLED
1034
if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035
((ctxt->sax->startElementNs != NULL) ||
1036
(ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1039
#endif /* LIBXML_SAX1_ENABLED */
1041
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1044
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045
(ctxt->str_xml_ns == NULL)) {
1046
xmlErrMemory(ctxt, NULL);
1050
typedef struct _xmlDefAttrs xmlDefAttrs;
1051
typedef xmlDefAttrs *xmlDefAttrsPtr;
1052
struct _xmlDefAttrs {
1053
int nbAttrs; /* number of defaulted attributes on that element */
1054
int maxAttrs; /* the size of the array */
1055
const xmlChar *values[5]; /* array of localname/prefix/values/external */
1059
* xmlAttrNormalizeSpace:
1060
* @src: the source string
1061
* @dst: the target string
1063
* Normalize the space in non CDATA attribute values:
1064
* If the attribute type is not CDATA, then the XML processor MUST further
1065
* process the normalized attribute value by discarding any leading and
1066
* trailing space (#x20) characters, and by replacing sequences of space
1067
* (#x20) characters by a single space (#x20) character.
1068
* Note that the size of dst need to be at least src, and if one doesn't need
1069
* to preserve dst (and it doesn't come from a dictionary or read-only) then
1070
* passing src as dst is just fine.
1072
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1076
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1078
if ((src == NULL) || (dst == NULL))
1081
while (*src == 0x20) src++;
1084
while (*src == 0x20) src++;
1098
* xmlAttrNormalizeSpace2:
1099
* @src: the source string
1101
* Normalize the space in non CDATA attribute values, a slightly more complex
1102
* front end to avoid allocation problems when running on attribute values
1103
* coming from the input.
1105
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1108
static const xmlChar *
1109
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1112
int remove_head = 0;
1113
int need_realloc = 0;
1116
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1123
while (*cur == 0x20) {
1130
if ((*cur == 0x20) || (*cur == 0)) {
1140
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1142
xmlErrMemory(ctxt, NULL);
1145
xmlAttrNormalizeSpace(ret, ret);
1146
*len = (int) strlen((const char *)ret);
1148
} else if (remove_head) {
1149
*len -= remove_head;
1150
memmove(src, src + remove_head, 1 + *len);
1158
* @ctxt: an XML parser context
1159
* @fullname: the element fullname
1160
* @fullattr: the attribute fullname
1161
* @value: the attribute value
1163
* Add a defaulted attribute for an element
1166
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167
const xmlChar *fullname,
1168
const xmlChar *fullattr,
1169
const xmlChar *value) {
1170
xmlDefAttrsPtr defaults;
1172
const xmlChar *name;
1173
const xmlChar *prefix;
1176
* Allows to detect attribute redefinitions
1178
if (ctxt->attsSpecial != NULL) {
1179
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1183
if (ctxt->attsDefault == NULL) {
1184
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1185
if (ctxt->attsDefault == NULL)
1190
* split the element name into prefix:localname , the string found
1191
* are within the DTD and then not associated to namespace names.
1193
name = xmlSplitQName3(fullname, &len);
1195
name = xmlDictLookup(ctxt->dict, fullname, -1);
1198
name = xmlDictLookup(ctxt->dict, name, -1);
1199
prefix = xmlDictLookup(ctxt->dict, fullname, len);
1203
* make sure there is some storage
1205
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206
if (defaults == NULL) {
1207
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1208
(4 * 5) * sizeof(const xmlChar *));
1209
if (defaults == NULL)
1211
defaults->nbAttrs = 0;
1212
defaults->maxAttrs = 4;
1213
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214
defaults, NULL) < 0) {
1218
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
1219
xmlDefAttrsPtr temp;
1221
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1222
(2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1226
defaults->maxAttrs *= 2;
1227
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228
defaults, NULL) < 0) {
1235
* Split the element name into prefix:localname , the string found
1236
* are within the DTD and hen not associated to namespace names.
1238
name = xmlSplitQName3(fullattr, &len);
1240
name = xmlDictLookup(ctxt->dict, fullattr, -1);
1243
name = xmlDictLookup(ctxt->dict, name, -1);
1244
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1247
defaults->values[5 * defaults->nbAttrs] = name;
1248
defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1249
/* intern the string and precompute the end */
1250
len = xmlStrlen(value);
1251
value = xmlDictLookup(ctxt->dict, value, len);
1252
defaults->values[5 * defaults->nbAttrs + 2] = value;
1253
defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1255
defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1257
defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1258
defaults->nbAttrs++;
1263
xmlErrMemory(ctxt, NULL);
1268
* xmlAddSpecialAttr:
1269
* @ctxt: an XML parser context
1270
* @fullname: the element fullname
1271
* @fullattr: the attribute fullname
1272
* @type: the attribute type
1274
* Register this attribute type
1277
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278
const xmlChar *fullname,
1279
const xmlChar *fullattr,
1282
if (ctxt->attsSpecial == NULL) {
1283
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1284
if (ctxt->attsSpecial == NULL)
1288
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1291
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292
(void *) (long) type);
1296
xmlErrMemory(ctxt, NULL);
1301
* xmlCleanSpecialAttrCallback:
1303
* Removes CDATA attributes from the special attribute table
1306
xmlCleanSpecialAttrCallback(void *payload, void *data,
1307
const xmlChar *fullname, const xmlChar *fullattr,
1308
const xmlChar *unused ATTRIBUTE_UNUSED) {
1309
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1311
if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1312
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1317
* xmlCleanSpecialAttr:
1318
* @ctxt: an XML parser context
1320
* Trim the list of attributes defined to remove all those of type
1321
* CDATA as they are not special. This call should be done when finishing
1322
* to parse the DTD and before starting to parse the document root.
1325
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1327
if (ctxt->attsSpecial == NULL)
1330
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1332
if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333
xmlHashFree(ctxt->attsSpecial, NULL);
1334
ctxt->attsSpecial = NULL;
1340
* xmlCheckLanguageID:
1341
* @lang: pointer to the string value
1343
* Checks that the value conforms to the LanguageID production:
1345
* NOTE: this is somewhat deprecated, those productions were removed from
1346
* the XML Second edition.
1348
* [33] LanguageID ::= Langcode ('-' Subcode)*
1349
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353
* [38] Subcode ::= ([a-z] | [A-Z])+
1355
* The current REC reference the sucessors of RFC 1766, currently 5646
1357
* http://www.rfc-editor.org/rfc/rfc5646.txt
1358
* langtag = language
1364
* language = 2*3ALPHA ; shortest ISO 639 code
1365
* ["-" extlang] ; sometimes followed by
1366
* ; extended language subtags
1367
* / 4ALPHA ; or reserved for future use
1368
* / 5*8ALPHA ; or registered language subtag
1370
* extlang = 3ALPHA ; selected ISO 639 codes
1371
* *2("-" 3ALPHA) ; permanently reserved
1373
* script = 4ALPHA ; ISO 15924 code
1375
* region = 2ALPHA ; ISO 3166-1 code
1376
* / 3DIGIT ; UN M.49 code
1378
* variant = 5*8alphanum ; registered variants
1379
* / (DIGIT 3alphanum)
1381
* extension = singleton 1*("-" (2*8alphanum))
1383
* ; Single alphanumerics
1384
* ; "x" reserved for private use
1385
* singleton = DIGIT ; 0 - 9
1391
* it sounds right to still allow Irregular i-xxx IANA and user codes too
1392
* The parser below doesn't try to cope with extension or privateuse
1393
* that could be added but that's not interoperable anyway
1395
* Returns 1 if correct 0 otherwise
1398
xmlCheckLanguageID(const xmlChar * lang)
1400
const xmlChar *cur = lang, *nxt;
1404
if (((cur[0] == 'i') && (cur[1] == '-')) ||
1405
((cur[0] == 'I') && (cur[1] == '-')) ||
1406
((cur[0] == 'x') && (cur[1] == '-')) ||
1407
((cur[0] == 'X') && (cur[1] == '-'))) {
1409
* Still allow IANA code and user code which were coming
1410
* from the previous version of the XML-1.0 specification
1411
* it's deprecated but we should not fail
1414
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1415
((cur[0] >= 'a') && (cur[0] <= 'z')))
1417
return(cur[0] == 0);
1420
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1423
if (nxt - cur >= 4) {
1427
if ((nxt - cur > 8) || (nxt[0] != 0))
1433
/* we got an ISO 639 code */
1441
/* now we can have extlang or script or region or variant */
1442
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1445
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1452
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456
/* we parsed an extlang */
1464
/* now we can have script or region or variant */
1465
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1468
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1477
/* we parsed a script */
1486
/* now we can have region or variant */
1487
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1490
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1494
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1498
/* we parsed a region */
1507
/* now we can just have a variant */
1508
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1512
if ((nxt - cur < 5) || (nxt - cur > 8))
1515
/* we parsed a variant */
1521
/* extensions and private use subtags not checked */
1525
if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526
((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1533
/************************************************************************
1535
* Parser stacks related functions and macros *
1537
************************************************************************/
1539
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540
const xmlChar ** str);
1545
* @ctxt: an XML parser context
1546
* @prefix: the namespace prefix or NULL
1547
* @URL: the namespace name
1549
* Pushes a new parser namespace on top of the ns stack
1551
* Returns -1 in case of error, -2 if the namespace should be discarded
1552
* and the index in the stack otherwise.
1555
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1557
if (ctxt->options & XML_PARSE_NSCLEAN) {
1559
for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1560
if (ctxt->nsTab[i] == prefix) {
1562
if (ctxt->nsTab[i + 1] == URL)
1564
/* out of scope keep it */
1569
if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1572
ctxt->nsTab = (const xmlChar **)
1573
xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574
if (ctxt->nsTab == NULL) {
1575
xmlErrMemory(ctxt, NULL);
1579
} else if (ctxt->nsNr >= ctxt->nsMax) {
1580
const xmlChar ** tmp;
1582
tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583
ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1585
xmlErrMemory(ctxt, NULL);
1591
ctxt->nsTab[ctxt->nsNr++] = prefix;
1592
ctxt->nsTab[ctxt->nsNr++] = URL;
1593
return (ctxt->nsNr);
1597
* @ctxt: an XML parser context
1598
* @nr: the number to pop
1600
* Pops the top @nr parser prefix/namespace from the ns stack
1602
* Returns the number of namespaces removed
1605
nsPop(xmlParserCtxtPtr ctxt, int nr)
1609
if (ctxt->nsTab == NULL) return(0);
1610
if (ctxt->nsNr < nr) {
1611
xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1614
if (ctxt->nsNr <= 0)
1617
for (i = 0;i < nr;i++) {
1619
ctxt->nsTab[ctxt->nsNr] = NULL;
1626
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627
const xmlChar **atts;
1631
if (ctxt->atts == NULL) {
1632
maxatts = 55; /* allow for 10 attrs by default */
1633
atts = (const xmlChar **)
1634
xmlMalloc(maxatts * sizeof(xmlChar *));
1635
if (atts == NULL) goto mem_error;
1637
attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638
if (attallocs == NULL) goto mem_error;
1639
ctxt->attallocs = attallocs;
1640
ctxt->maxatts = maxatts;
1641
} else if (nr + 5 > ctxt->maxatts) {
1642
maxatts = (nr + 5) * 2;
1643
atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644
maxatts * sizeof(const xmlChar *));
1645
if (atts == NULL) goto mem_error;
1647
attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648
(maxatts / 5) * sizeof(int));
1649
if (attallocs == NULL) goto mem_error;
1650
ctxt->attallocs = attallocs;
1651
ctxt->maxatts = maxatts;
1653
return(ctxt->maxatts);
1655
xmlErrMemory(ctxt, NULL);
1661
* @ctxt: an XML parser context
1662
* @value: the parser input
1664
* Pushes a new parser input on top of the input stack
1666
* Returns -1 in case of error, the index in the stack otherwise
1669
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1671
if ((ctxt == NULL) || (value == NULL))
1673
if (ctxt->inputNr >= ctxt->inputMax) {
1674
ctxt->inputMax *= 2;
1676
(xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1678
sizeof(ctxt->inputTab[0]));
1679
if (ctxt->inputTab == NULL) {
1680
xmlErrMemory(ctxt, NULL);
1681
xmlFreeInputStream(value);
1682
ctxt->inputMax /= 2;
1687
ctxt->inputTab[ctxt->inputNr] = value;
1688
ctxt->input = value;
1689
return (ctxt->inputNr++);
1693
* @ctxt: an XML parser context
1695
* Pops the top parser input from the input stack
1697
* Returns the input just removed
1700
inputPop(xmlParserCtxtPtr ctxt)
1702
xmlParserInputPtr ret;
1706
if (ctxt->inputNr <= 0)
1709
if (ctxt->inputNr > 0)
1710
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1713
ret = ctxt->inputTab[ctxt->inputNr];
1714
ctxt->inputTab[ctxt->inputNr] = NULL;
1719
* @ctxt: an XML parser context
1720
* @value: the element node
1722
* Pushes a new element node on top of the node stack
1724
* Returns -1 in case of error, the index in the stack otherwise
1727
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1729
if (ctxt == NULL) return(0);
1730
if (ctxt->nodeNr >= ctxt->nodeMax) {
1733
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1735
sizeof(ctxt->nodeTab[0]));
1737
xmlErrMemory(ctxt, NULL);
1740
ctxt->nodeTab = tmp;
1743
if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744
((ctxt->options & XML_PARSE_HUGE) == 0)) {
1745
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1746
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1748
ctxt->instate = XML_PARSER_EOF;
1751
ctxt->nodeTab[ctxt->nodeNr] = value;
1753
return (ctxt->nodeNr++);
1758
* @ctxt: an XML parser context
1760
* Pops the top element node from the node stack
1762
* Returns the node just removed
1765
nodePop(xmlParserCtxtPtr ctxt)
1769
if (ctxt == NULL) return(NULL);
1770
if (ctxt->nodeNr <= 0)
1773
if (ctxt->nodeNr > 0)
1774
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1777
ret = ctxt->nodeTab[ctxt->nodeNr];
1778
ctxt->nodeTab[ctxt->nodeNr] = NULL;
1782
#ifdef LIBXML_PUSH_ENABLED
1785
* @ctxt: an XML parser context
1786
* @value: the element name
1787
* @prefix: the element prefix
1788
* @URI: the element namespace name
1790
* Pushes a new element name/prefix/URL on top of the name stack
1792
* Returns -1 in case of error, the index in the stack otherwise
1795
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796
const xmlChar *prefix, const xmlChar *URI, int nsNr)
1798
if (ctxt->nameNr >= ctxt->nameMax) {
1799
const xmlChar * *tmp;
1802
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1804
sizeof(ctxt->nameTab[0]));
1809
ctxt->nameTab = tmp;
1810
tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1812
sizeof(ctxt->pushTab[0]));
1817
ctxt->pushTab = tmp2;
1819
ctxt->nameTab[ctxt->nameNr] = value;
1821
ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822
ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1823
ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1824
return (ctxt->nameNr++);
1826
xmlErrMemory(ctxt, NULL);
1831
* @ctxt: an XML parser context
1833
* Pops the top element/prefix/URI name from the name stack
1835
* Returns the name just removed
1837
static const xmlChar *
1838
nameNsPop(xmlParserCtxtPtr ctxt)
1842
if (ctxt->nameNr <= 0)
1845
if (ctxt->nameNr > 0)
1846
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1849
ret = ctxt->nameTab[ctxt->nameNr];
1850
ctxt->nameTab[ctxt->nameNr] = NULL;
1853
#endif /* LIBXML_PUSH_ENABLED */
1857
* @ctxt: an XML parser context
1858
* @value: the element name
1860
* Pushes a new element name on top of the name stack
1862
* Returns -1 in case of error, the index in the stack otherwise
1865
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1867
if (ctxt == NULL) return (-1);
1869
if (ctxt->nameNr >= ctxt->nameMax) {
1870
const xmlChar * *tmp;
1871
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1873
sizeof(ctxt->nameTab[0]));
1877
ctxt->nameTab = tmp;
1880
ctxt->nameTab[ctxt->nameNr] = value;
1882
return (ctxt->nameNr++);
1884
xmlErrMemory(ctxt, NULL);
1889
* @ctxt: an XML parser context
1891
* Pops the top element name from the name stack
1893
* Returns the name just removed
1896
namePop(xmlParserCtxtPtr ctxt)
1900
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1903
if (ctxt->nameNr > 0)
1904
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1907
ret = ctxt->nameTab[ctxt->nameNr];
1908
ctxt->nameTab[ctxt->nameNr] = NULL;
1912
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1913
if (ctxt->spaceNr >= ctxt->spaceMax) {
1916
ctxt->spaceMax *= 2;
1917
tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1920
xmlErrMemory(ctxt, NULL);
1924
ctxt->spaceTab = tmp;
1926
ctxt->spaceTab[ctxt->spaceNr] = val;
1927
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928
return(ctxt->spaceNr++);
1931
static int spacePop(xmlParserCtxtPtr ctxt) {
1933
if (ctxt->spaceNr <= 0) return(0);
1935
if (ctxt->spaceNr > 0)
1936
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1938
ctxt->space = &ctxt->spaceTab[0];
1939
ret = ctxt->spaceTab[ctxt->spaceNr];
1940
ctxt->spaceTab[ctxt->spaceNr] = -1;
1945
* Macros for accessing the content. Those should be used only by the parser,
1948
* Dirty macros, i.e. one often need to make assumption on the context to
1951
* CUR_PTR return the current pointer to the xmlChar to be parsed.
1952
* To be used with extreme caution since operations consuming
1953
* characters may move the input buffer to a different location !
1954
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955
* This should be used internally by the parser
1956
* only to compare to ASCII values otherwise it would break when
1957
* running with UTF-8 encoding.
1958
* RAW same as CUR but in the input buffer, bypass any token
1959
* extraction that may have been done
1960
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961
* to compare on ASCII based substring.
1962
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1963
* strings without newlines within the parser.
1964
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1965
* defined char within the parser.
1966
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1968
* NEXT Skip to the next character, this does the proper decoding
1969
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
1970
* NEXTL(l) Skip the current unicode character of l xmlChars long.
1971
* CUR_CHAR(l) returns the current unicode character (int), set l
1972
* to the number of xmlChars used for the encoding [0-5].
1973
* CUR_SCHAR same but operate on a string instead of the context
1974
* COPY_BUF copy the current unicode char to the target buffer, increment
1976
* GROW, SHRINK handling of input buffers
1979
#define RAW (*ctxt->input->cur)
1980
#define CUR (*ctxt->input->cur)
1981
#define NXT(val) ctxt->input->cur[(val)]
1982
#define CUR_PTR ctxt->input->cur
1984
#define CMP4( s, c1, c2, c3, c4 ) \
1985
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997
((unsigned char *) s)[ 8 ] == c9 )
1998
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000
((unsigned char *) s)[ 9 ] == c10 )
2002
#define SKIP(val) do { \
2003
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2004
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2005
if ((*ctxt->input->cur == 0) && \
2006
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007
xmlPopInput(ctxt); \
2010
#define SKIPL(val) do { \
2012
for(skipl=0; skipl<val; skipl++) { \
2013
if (*(ctxt->input->cur) == '\n') { \
2014
ctxt->input->line++; ctxt->input->col = 1; \
2015
} else ctxt->input->col++; \
2017
ctxt->input->cur++; \
2019
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020
if ((*ctxt->input->cur == 0) && \
2021
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022
xmlPopInput(ctxt); \
2025
#define SHRINK if ((ctxt->progressive == 0) && \
2026
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2030
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031
xmlParserInputShrink(ctxt->input);
2032
if ((*ctxt->input->cur == 0) &&
2033
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2037
#define GROW if ((ctxt->progressive == 0) && \
2038
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2041
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2042
if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
2044
((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2045
((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2047
ctxt->instate = XML_PARSER_EOF;
2049
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2050
if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2051
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2057
#define NEXT xmlNextChar(ctxt)
2060
ctxt->input->col++; \
2061
ctxt->input->cur++; \
2063
if (*ctxt->input->cur == 0) \
2064
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2067
#define NEXTL(l) do { \
2068
if (*(ctxt->input->cur) == '\n') { \
2069
ctxt->input->line++; ctxt->input->col = 1; \
2070
} else ctxt->input->col++; \
2071
ctxt->input->cur += l; \
2072
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2075
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2078
#define COPY_BUF(l,b,i,v) \
2079
if (l == 1) b[i++] = (xmlChar) v; \
2080
else i += xmlCopyCharMultiByte(&b[i],v)
2083
* xmlSkipBlankChars:
2084
* @ctxt: the XML parser context
2086
* skip all blanks character found at that point in the input streams.
2087
* It pops up finished entities in the process if allowable at that point.
2089
* Returns the number of space chars skipped
2093
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2097
* It's Okay to use CUR/NEXT here since all the blanks are on
2100
if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2103
* if we are in the document content, go really fast
2105
cur = ctxt->input->cur;
2106
while (IS_BLANK_CH(*cur)) {
2108
ctxt->input->line++; ctxt->input->col = 1;
2113
ctxt->input->cur = cur;
2114
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115
cur = ctxt->input->cur;
2118
ctxt->input->cur = cur;
2123
while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2128
while ((cur == 0) && (ctxt->inputNr > 1) &&
2129
(ctxt->instate != XML_PARSER_COMMENT)) {
2134
* Need to handle support of entities branching here
2136
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137
} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2142
/************************************************************************
2144
* Commodity functions to handle entities *
2146
************************************************************************/
2150
* @ctxt: an XML parser context
2152
* xmlPopInput: the current input pointed by ctxt->input came to an end
2153
* pop it and return the next char.
2155
* Returns the current xmlChar in the parser context
2158
xmlPopInput(xmlParserCtxtPtr ctxt) {
2159
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2160
if (xmlParserDebugEntities)
2161
xmlGenericError(xmlGenericErrorContext,
2162
"Popping input %d\n", ctxt->inputNr);
2163
xmlFreeInputStream(inputPop(ctxt));
2164
if ((*ctxt->input->cur == 0) &&
2165
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166
return(xmlPopInput(ctxt));
2172
* @ctxt: an XML parser context
2173
* @input: an XML parser input fragment (entity, XML fragment ...).
2175
* xmlPushInput: switch to a new input stream which is stacked on top
2176
* of the previous one(s).
2177
* Returns -1 in case of error or the index in the input stack
2180
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2182
if (input == NULL) return(-1);
2184
if (xmlParserDebugEntities) {
2185
if ((ctxt->input != NULL) && (ctxt->input->filename))
2186
xmlGenericError(xmlGenericErrorContext,
2187
"%s(%d): ", ctxt->input->filename,
2189
xmlGenericError(xmlGenericErrorContext,
2190
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2192
ret = inputPush(ctxt, input);
2193
if (ctxt->instate == XML_PARSER_EOF)
2201
* @ctxt: an XML parser context
2203
* parse Reference declarations
2205
* [66] CharRef ::= '&#' [0-9]+ ';' |
2206
* '&#x' [0-9a-fA-F]+ ';'
2208
* [ WFC: Legal Character ]
2209
* Characters referred to using character references must match the
2210
* production for Char.
2212
* Returns the value parsed (as an int), 0 in case of error
2215
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2216
unsigned int val = 0;
2218
unsigned int outofrange = 0;
2221
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2223
if ((RAW == '&') && (NXT(1) == '#') &&
2227
while (RAW != ';') { /* loop blocked by count */
2231
if (ctxt->instate == XML_PARSER_EOF)
2234
if ((RAW >= '0') && (RAW <= '9'))
2235
val = val * 16 + (CUR - '0');
2236
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237
val = val * 16 + (CUR - 'a') + 10;
2238
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239
val = val * 16 + (CUR - 'A') + 10;
2241
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2252
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2257
} else if ((RAW == '&') && (NXT(1) == '#')) {
2260
while (RAW != ';') { /* loop blocked by count */
2264
if (ctxt->instate == XML_PARSER_EOF)
2267
if ((RAW >= '0') && (RAW <= '9'))
2268
val = val * 10 + (CUR - '0');
2270
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2281
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2287
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2291
* [ WFC: Legal Character ]
2292
* Characters referred to using character references must match the
2293
* production for Char.
2295
if ((IS_CHAR(val) && (outofrange == 0))) {
2298
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299
"xmlParseCharRef: invalid xmlChar value %d\n",
2306
* xmlParseStringCharRef:
2307
* @ctxt: an XML parser context
2308
* @str: a pointer to an index in the string
2310
* parse Reference declarations, variant parsing from a string rather
2311
* than an an input flow.
2313
* [66] CharRef ::= '&#' [0-9]+ ';' |
2314
* '&#x' [0-9a-fA-F]+ ';'
2316
* [ WFC: Legal Character ]
2317
* Characters referred to using character references must match the
2318
* production for Char.
2320
* Returns the value parsed (as an int), 0 in case of error, str will be
2321
* updated to the current value of the index
2324
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2327
unsigned int val = 0;
2328
unsigned int outofrange = 0;
2330
if ((str == NULL) || (*str == NULL)) return(0);
2333
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2336
while (cur != ';') { /* Non input consuming loop */
2337
if ((cur >= '0') && (cur <= '9'))
2338
val = val * 16 + (cur - '0');
2339
else if ((cur >= 'a') && (cur <= 'f'))
2340
val = val * 16 + (cur - 'a') + 10;
2341
else if ((cur >= 'A') && (cur <= 'F'))
2342
val = val * 16 + (cur - 'A') + 10;
2344
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2356
} else if ((cur == '&') && (ptr[1] == '#')){
2359
while (cur != ';') { /* Non input consuming loops */
2360
if ((cur >= '0') && (cur <= '9'))
2361
val = val * 10 + (cur - '0');
2363
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2376
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2382
* [ WFC: Legal Character ]
2383
* Characters referred to using character references must match the
2384
* production for Char.
2386
if ((IS_CHAR(val) && (outofrange == 0))) {
2389
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
"xmlParseStringCharRef: invalid xmlChar value %d\n",
2397
* xmlNewBlanksWrapperInputStream:
2398
* @ctxt: an XML parser context
2399
* @entity: an Entity pointer
2401
* Create a new input stream for wrapping
2402
* blanks around a PEReference
2404
* Returns the new input stream or NULL
2407
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2409
static xmlParserInputPtr
2410
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411
xmlParserInputPtr input;
2414
if (entity == NULL) {
2415
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416
"xmlNewBlanksWrapperInputStream entity\n");
2419
if (xmlParserDebugEntities)
2420
xmlGenericError(xmlGenericErrorContext,
2421
"new blanks wrapper for entity: %s\n", entity->name);
2422
input = xmlNewInputStream(ctxt);
2423
if (input == NULL) {
2426
length = xmlStrlen(entity->name) + 5;
2427
buffer = xmlMallocAtomic(length);
2428
if (buffer == NULL) {
2429
xmlErrMemory(ctxt, NULL);
2435
buffer [length-3] = ';';
2436
buffer [length-2] = ' ';
2437
buffer [length-1] = 0;
2438
memcpy(buffer + 2, entity->name, length - 5);
2439
input->free = deallocblankswrapper;
2440
input->base = buffer;
2441
input->cur = buffer;
2442
input->length = length;
2443
input->end = &buffer[length];
2448
* xmlParserHandlePEReference:
2449
* @ctxt: the parser context
2451
* [69] PEReference ::= '%' Name ';'
2453
* [ WFC: No Recursion ]
2454
* A parsed entity must not contain a recursive
2455
* reference to itself, either directly or indirectly.
2457
* [ WFC: Entity Declared ]
2458
* In a document without any DTD, a document with only an internal DTD
2459
* subset which contains no parameter entity references, or a document
2460
* with "standalone='yes'", ... ... The declaration of a parameter
2461
* entity must precede any reference to it...
2463
* [ VC: Entity Declared ]
2464
* In a document with an external subset or external parameter entities
2465
* with "standalone='no'", ... ... The declaration of a parameter entity
2466
* must precede any reference to it...
2469
* Parameter-entity references may only appear in the DTD.
2470
* NOTE: misleading but this is handled.
2472
* A PEReference may have been detected in the current input stream
2473
* the handling is done accordingly to
2474
* http://www.w3.org/TR/REC-xml#entproc
2476
* - Included in literal in entity values
2477
* - Included as Parameter Entity reference within DTDs
2480
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2481
const xmlChar *name;
2482
xmlEntityPtr entity = NULL;
2483
xmlParserInputPtr input;
2485
if (RAW != '%') return;
2486
switch(ctxt->instate) {
2487
case XML_PARSER_CDATA_SECTION:
2489
case XML_PARSER_COMMENT:
2491
case XML_PARSER_START_TAG:
2493
case XML_PARSER_END_TAG:
2495
case XML_PARSER_EOF:
2496
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2498
case XML_PARSER_PROLOG:
2499
case XML_PARSER_START:
2500
case XML_PARSER_MISC:
2501
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2503
case XML_PARSER_ENTITY_DECL:
2504
case XML_PARSER_CONTENT:
2505
case XML_PARSER_ATTRIBUTE_VALUE:
2507
case XML_PARSER_SYSTEM_LITERAL:
2508
case XML_PARSER_PUBLIC_LITERAL:
2509
/* we just ignore it there */
2511
case XML_PARSER_EPILOG:
2512
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2514
case XML_PARSER_ENTITY_VALUE:
2516
* NOTE: in the case of entity values, we don't do the
2517
* substitution here since we need the literal
2518
* entity value to be able to save the internal
2519
* subset of the document.
2520
* This will be handled by xmlStringDecodeEntities
2523
case XML_PARSER_DTD:
2525
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526
* In the internal DTD subset, parameter-entity references
2527
* can occur only where markup declarations can occur, not
2528
* within markup declarations.
2529
* In that case this is handled in xmlParseMarkupDecl
2531
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2533
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2536
case XML_PARSER_IGNORE:
2541
name = xmlParseName(ctxt);
2542
if (xmlParserDebugEntities)
2543
xmlGenericError(xmlGenericErrorContext,
2544
"PEReference: %s\n", name);
2546
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2550
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2552
if (ctxt->instate == XML_PARSER_EOF)
2554
if (entity == NULL) {
2557
* [ WFC: Entity Declared ]
2558
* In a document without any DTD, a document with only an
2559
* internal DTD subset which contains no parameter entity
2560
* references, or a document with "standalone='yes'", ...
2561
* ... The declaration of a parameter entity must precede
2562
* any reference to it...
2564
if ((ctxt->standalone == 1) ||
2565
((ctxt->hasExternalSubset == 0) &&
2566
(ctxt->hasPErefs == 0))) {
2567
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2568
"PEReference: %%%s; not found\n", name);
2571
* [ VC: Entity Declared ]
2572
* In a document with an external subset or external
2573
* parameter entities with "standalone='no'", ...
2574
* ... The declaration of a parameter entity must precede
2575
* any reference to it...
2577
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579
"PEReference: %%%s; not found\n",
2582
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583
"PEReference: %%%s; not found\n",
2587
} else if (ctxt->input->free != deallocblankswrapper) {
2588
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2589
if (xmlPushInput(ctxt, input) < 0)
2592
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2595
xmlCharEncoding enc;
2598
* Note: external parsed entities will not be loaded, it is
2599
* not required for a non-validating parser, unless the
2600
* option of validating, or substituting entities were
2601
* given. Doing so is far more secure as the parser will
2602
* only process data coming from the document entity by
2605
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2606
((ctxt->options & XML_PARSE_NOENT) == 0) &&
2607
((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2608
(ctxt->validate == 0))
2612
* handle the extra spaces added before and after
2613
* c.f. http://www.w3.org/TR/REC-xml#as-PE
2614
* this is done independently.
2616
input = xmlNewEntityInputStream(ctxt, entity);
2617
if (xmlPushInput(ctxt, input) < 0)
2621
* Get the 4 first bytes and decode the charset
2622
* if enc != XML_CHAR_ENCODING_NONE
2623
* plug some encoding conversion routines.
2624
* Note that, since we may have some non-UTF8
2625
* encoding (like UTF16, bug 135229), the 'length'
2626
* is not known, but we can calculate based upon
2627
* the amount of data in the buffer.
2630
if (ctxt->instate == XML_PARSER_EOF)
2632
if ((ctxt->input->end - ctxt->input->cur)>=4) {
2637
enc = xmlDetectCharEncoding(start, 4);
2638
if (enc != XML_CHAR_ENCODING_NONE) {
2639
xmlSwitchEncoding(ctxt, enc);
2643
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2644
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2645
(IS_BLANK_CH(NXT(5)))) {
2646
xmlParseTextDecl(ctxt);
2649
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2650
"PEReference: %s is not a parameter entity\n",
2655
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2661
* Macro used to grow the current buffer.
2662
* buffer##_size is expected to be a size_t
2663
* mem_error: is expected to handle memory allocation failures
2665
#define growBuffer(buffer, n) { \
2667
size_t new_size = buffer##_size * 2 + n; \
2668
if (new_size < buffer##_size) goto mem_error; \
2669
tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2670
if (tmp == NULL) goto mem_error; \
2672
buffer##_size = new_size; \
2676
* xmlStringLenDecodeEntities:
2677
* @ctxt: the parser context
2678
* @str: the input string
2679
* @len: the string length
2680
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2681
* @end: an end marker xmlChar, 0 if none
2682
* @end2: an end marker xmlChar, 0 if none
2683
* @end3: an end marker xmlChar, 0 if none
2685
* Takes a entity string content and process to do the adequate substitutions.
2687
* [67] Reference ::= EntityRef | CharRef
2689
* [69] PEReference ::= '%' Name ';'
2691
* Returns A newly allocated string with the substitution done. The caller
2692
* must deallocate it !
2695
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2696
int what, xmlChar end, xmlChar end2, xmlChar end3) {
2697
xmlChar *buffer = NULL;
2698
size_t buffer_size = 0;
2701
xmlChar *current = NULL;
2702
xmlChar *rep = NULL;
2703
const xmlChar *last;
2707
if ((ctxt == NULL) || (str == NULL) || (len < 0))
2711
if (((ctxt->depth > 40) &&
2712
((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2713
(ctxt->depth > 1024)) {
2714
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2719
* allocate a translation buffer.
2721
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2722
buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2723
if (buffer == NULL) goto mem_error;
2726
* OK loop until we reach one of the ending char or a size limit.
2727
* we are operating on already parsed values.
2730
c = CUR_SCHAR(str, l);
2733
while ((c != 0) && (c != end) && /* non input consuming loop */
2734
(c != end2) && (c != end3)) {
2737
if ((c == '&') && (str[1] == '#')) {
2738
int val = xmlParseStringCharRef(ctxt, &str);
2740
COPY_BUF(0,buffer,nbchars,val);
2742
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2743
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2745
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2746
if (xmlParserDebugEntities)
2747
xmlGenericError(xmlGenericErrorContext,
2748
"String decoding Entity Reference: %.30s\n",
2750
ent = xmlParseStringEntityRef(ctxt, &str);
2751
if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2752
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2755
ctxt->nbentities += ent->checked / 2;
2756
if ((ent != NULL) &&
2757
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2758
if (ent->content != NULL) {
2759
COPY_BUF(0,buffer,nbchars,ent->content[0]);
2760
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2761
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2764
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2765
"predefined entity has no content\n");
2767
} else if ((ent != NULL) && (ent->content != NULL)) {
2769
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2775
while (*current != 0) { /* non input consuming loop */
2776
buffer[nbchars++] = *current++;
2777
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2780
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2786
} else if (ent != NULL) {
2787
int i = xmlStrlen(ent->name);
2788
const xmlChar *cur = ent->name;
2790
buffer[nbchars++] = '&';
2791
if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2792
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2795
buffer[nbchars++] = *cur++;
2796
buffer[nbchars++] = ';';
2798
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2799
if (xmlParserDebugEntities)
2800
xmlGenericError(xmlGenericErrorContext,
2801
"String decoding PE Reference: %.30s\n", str);
2802
ent = xmlParseStringPEReference(ctxt, &str);
2803
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2806
ctxt->nbentities += ent->checked / 2;
2808
if (ent->content == NULL) {
2809
xmlLoadEntityContent(ctxt, ent);
2812
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2817
while (*current != 0) { /* non input consuming loop */
2818
buffer[nbchars++] = *current++;
2819
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2820
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2822
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2830
COPY_BUF(l,buffer,nbchars,c);
2832
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2833
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2837
c = CUR_SCHAR(str, l);
2841
buffer[nbchars] = 0;
2845
xmlErrMemory(ctxt, NULL);
2855
* xmlStringDecodeEntities:
2856
* @ctxt: the parser context
2857
* @str: the input string
2858
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2859
* @end: an end marker xmlChar, 0 if none
2860
* @end2: an end marker xmlChar, 0 if none
2861
* @end3: an end marker xmlChar, 0 if none
2863
* Takes a entity string content and process to do the adequate substitutions.
2865
* [67] Reference ::= EntityRef | CharRef
2867
* [69] PEReference ::= '%' Name ';'
2869
* Returns A newly allocated string with the substitution done. The caller
2870
* must deallocate it !
2873
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2874
xmlChar end, xmlChar end2, xmlChar end3) {
2875
if ((ctxt == NULL) || (str == NULL)) return(NULL);
2876
return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2880
/************************************************************************
2882
* Commodity functions, cleanup needed ? *
2884
************************************************************************/
2888
* @ctxt: an XML parser context
2890
* @len: the size of @str
2891
* @blank_chars: we know the chars are blanks
2893
* Is this a sequence of blank chars that one can ignore ?
2895
* Returns 1 if ignorable 0 otherwise.
2898
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2901
xmlNodePtr lastChild;
2904
* Don't spend time trying to differentiate them, the same callback is
2907
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2911
* Check for xml:space value.
2913
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2914
(*(ctxt->space) == -2))
2918
* Check that the string is made of blanks
2920
if (blank_chars == 0) {
2921
for (i = 0;i < len;i++)
2922
if (!(IS_BLANK_CH(str[i]))) return(0);
2926
* Look if the element is mixed content in the DTD if available
2928
if (ctxt->node == NULL) return(0);
2929
if (ctxt->myDoc != NULL) {
2930
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2931
if (ret == 0) return(1);
2932
if (ret == 1) return(0);
2936
* Otherwise, heuristic :-\
2938
if ((RAW != '<') && (RAW != 0xD)) return(0);
2939
if ((ctxt->node->children == NULL) &&
2940
(RAW == '<') && (NXT(1) == '/')) return(0);
2942
lastChild = xmlGetLastChild(ctxt->node);
2943
if (lastChild == NULL) {
2944
if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2945
(ctxt->node->content != NULL)) return(0);
2946
} else if (xmlNodeIsText(lastChild))
2948
else if ((ctxt->node->children != NULL) &&
2949
(xmlNodeIsText(ctxt->node->children)))
2954
/************************************************************************
2956
* Extra stuff for namespace support *
2957
* Relates to http://www.w3.org/TR/WD-xml-names *
2959
************************************************************************/
2963
* @ctxt: an XML parser context
2964
* @name: an XML parser context
2965
* @prefix: a xmlChar **
2967
* parse an UTF8 encoded XML qualified name string
2969
* [NS 5] QName ::= (Prefix ':')? LocalPart
2971
* [NS 6] Prefix ::= NCName
2973
* [NS 7] LocalPart ::= NCName
2975
* Returns the local part, and prefix is updated
2976
* to get the Prefix if any.
2980
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2981
xmlChar buf[XML_MAX_NAMELEN + 5];
2982
xmlChar *buffer = NULL;
2984
int max = XML_MAX_NAMELEN;
2985
xmlChar *ret = NULL;
2986
const xmlChar *cur = name;
2989
if (prefix == NULL) return(NULL);
2992
if (cur == NULL) return(NULL);
2994
#ifndef XML_XML_NAMESPACE
2995
/* xml: prefix is not really a namespace */
2996
if ((cur[0] == 'x') && (cur[1] == 'm') &&
2997
(cur[2] == 'l') && (cur[3] == ':'))
2998
return(xmlStrdup(name));
3001
/* nasty but well=formed */
3003
return(xmlStrdup(name));
3006
while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3012
* Okay someone managed to make a huge name, so he's ready to pay
3013
* for the processing speed.
3017
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3018
if (buffer == NULL) {
3019
xmlErrMemory(ctxt, NULL);
3022
memcpy(buffer, buf, len);
3023
while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3024
if (len + 10 > max) {
3028
tmp = (xmlChar *) xmlRealloc(buffer,
3029
max * sizeof(xmlChar));
3032
xmlErrMemory(ctxt, NULL);
3043
if ((c == ':') && (*cur == 0)) {
3047
return(xmlStrdup(name));
3051
ret = xmlStrndup(buf, len);
3055
max = XML_MAX_NAMELEN;
3063
return(xmlStrndup(BAD_CAST "", 0));
3068
* Check that the first character is proper to start
3071
if (!(((c >= 0x61) && (c <= 0x7A)) ||
3072
((c >= 0x41) && (c <= 0x5A)) ||
3073
(c == '_') || (c == ':'))) {
3075
int first = CUR_SCHAR(cur, l);
3077
if (!IS_LETTER(first) && (first != '_')) {
3078
xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3079
"Name %s is not XML Namespace compliant\n",
3085
while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3091
* Okay someone managed to make a huge name, so he's ready to pay
3092
* for the processing speed.
3096
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3097
if (buffer == NULL) {
3098
xmlErrMemory(ctxt, NULL);
3101
memcpy(buffer, buf, len);
3102
while (c != 0) { /* tested bigname2.xml */
3103
if (len + 10 > max) {
3107
tmp = (xmlChar *) xmlRealloc(buffer,
3108
max * sizeof(xmlChar));
3110
xmlErrMemory(ctxt, NULL);
3123
ret = xmlStrndup(buf, len);
3132
/************************************************************************
3134
* The parser itself *
3135
* Relates to http://www.w3.org/TR/REC-xml *
3137
************************************************************************/
3139
/************************************************************************
3141
* Routines to parse Name, NCName and NmToken *
3143
************************************************************************/
3145
static unsigned long nbParseName = 0;
3146
static unsigned long nbParseNmToken = 0;
3147
static unsigned long nbParseNCName = 0;
3148
static unsigned long nbParseNCNameComplex = 0;
3149
static unsigned long nbParseNameComplex = 0;
3150
static unsigned long nbParseStringName = 0;
3154
* The two following functions are related to the change of accepted
3155
* characters for Name and NmToken in the Revision 5 of XML-1.0
3156
* They correspond to the modified production [4] and the new production [4a]
3157
* changes in that revision. Also note that the macros used for the
3158
* productions Letter, Digit, CombiningChar and Extender are not needed
3160
* We still keep compatibility to pre-revision5 parsing semantic if the
3161
* new XML_PARSE_OLD10 option is given to the parser.
3164
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3165
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3167
* Use the new checks of production [4] [4a] amd [5] of the
3168
* Update 5 of XML-1.0
3170
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3171
(((c >= 'a') && (c <= 'z')) ||
3172
((c >= 'A') && (c <= 'Z')) ||
3173
(c == '_') || (c == ':') ||
3174
((c >= 0xC0) && (c <= 0xD6)) ||
3175
((c >= 0xD8) && (c <= 0xF6)) ||
3176
((c >= 0xF8) && (c <= 0x2FF)) ||
3177
((c >= 0x370) && (c <= 0x37D)) ||
3178
((c >= 0x37F) && (c <= 0x1FFF)) ||
3179
((c >= 0x200C) && (c <= 0x200D)) ||
3180
((c >= 0x2070) && (c <= 0x218F)) ||
3181
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182
((c >= 0x3001) && (c <= 0xD7FF)) ||
3183
((c >= 0xF900) && (c <= 0xFDCF)) ||
3184
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185
((c >= 0x10000) && (c <= 0xEFFFF))))
3188
if (IS_LETTER(c) || (c == '_') || (c == ':'))
3195
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3196
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3198
* Use the new checks of production [4] [4a] amd [5] of the
3199
* Update 5 of XML-1.0
3201
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3202
(((c >= 'a') && (c <= 'z')) ||
3203
((c >= 'A') && (c <= 'Z')) ||
3204
((c >= '0') && (c <= '9')) || /* !start */
3205
(c == '_') || (c == ':') ||
3206
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3207
((c >= 0xC0) && (c <= 0xD6)) ||
3208
((c >= 0xD8) && (c <= 0xF6)) ||
3209
((c >= 0xF8) && (c <= 0x2FF)) ||
3210
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3211
((c >= 0x370) && (c <= 0x37D)) ||
3212
((c >= 0x37F) && (c <= 0x1FFF)) ||
3213
((c >= 0x200C) && (c <= 0x200D)) ||
3214
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3215
((c >= 0x2070) && (c <= 0x218F)) ||
3216
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3217
((c >= 0x3001) && (c <= 0xD7FF)) ||
3218
((c >= 0xF900) && (c <= 0xFDCF)) ||
3219
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3220
((c >= 0x10000) && (c <= 0xEFFFF))))
3223
if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3224
(c == '.') || (c == '-') ||
3225
(c == '_') || (c == ':') ||
3226
(IS_COMBINING(c)) ||
3233
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3234
int *len, int *alloc, int normalize);
3236
static const xmlChar *
3237
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3243
nbParseNameComplex++;
3247
* Handler for more complex cases
3250
if (ctxt->instate == XML_PARSER_EOF)
3253
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3255
* Use the new checks of production [4] [4a] amd [5] of the
3256
* Update 5 of XML-1.0
3258
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3259
(!(((c >= 'a') && (c <= 'z')) ||
3260
((c >= 'A') && (c <= 'Z')) ||
3261
(c == '_') || (c == ':') ||
3262
((c >= 0xC0) && (c <= 0xD6)) ||
3263
((c >= 0xD8) && (c <= 0xF6)) ||
3264
((c >= 0xF8) && (c <= 0x2FF)) ||
3265
((c >= 0x370) && (c <= 0x37D)) ||
3266
((c >= 0x37F) && (c <= 0x1FFF)) ||
3267
((c >= 0x200C) && (c <= 0x200D)) ||
3268
((c >= 0x2070) && (c <= 0x218F)) ||
3269
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270
((c >= 0x3001) && (c <= 0xD7FF)) ||
3271
((c >= 0xF900) && (c <= 0xFDCF)) ||
3272
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273
((c >= 0x10000) && (c <= 0xEFFFF))))) {
3279
while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3280
(((c >= 'a') && (c <= 'z')) ||
3281
((c >= 'A') && (c <= 'Z')) ||
3282
((c >= '0') && (c <= '9')) || /* !start */
3283
(c == '_') || (c == ':') ||
3284
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3285
((c >= 0xC0) && (c <= 0xD6)) ||
3286
((c >= 0xD8) && (c <= 0xF6)) ||
3287
((c >= 0xF8) && (c <= 0x2FF)) ||
3288
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3289
((c >= 0x370) && (c <= 0x37D)) ||
3290
((c >= 0x37F) && (c <= 0x1FFF)) ||
3291
((c >= 0x200C) && (c <= 0x200D)) ||
3292
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3293
((c >= 0x2070) && (c <= 0x218F)) ||
3294
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3295
((c >= 0x3001) && (c <= 0xD7FF)) ||
3296
((c >= 0xF900) && (c <= 0xFDCF)) ||
3297
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3298
((c >= 0x10000) && (c <= 0xEFFFF))
3300
if (count++ > XML_PARSER_CHUNK_SIZE) {
3303
if (ctxt->instate == XML_PARSER_EOF)
3311
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3312
(!IS_LETTER(c) && (c != '_') &&
3320
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3321
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3322
(c == '.') || (c == '-') ||
3323
(c == '_') || (c == ':') ||
3324
(IS_COMBINING(c)) ||
3325
(IS_EXTENDER(c)))) {
3326
if (count++ > XML_PARSER_CHUNK_SIZE) {
3329
if (ctxt->instate == XML_PARSER_EOF)
3338
if (ctxt->instate == XML_PARSER_EOF)
3344
if ((len > XML_MAX_NAME_LENGTH) &&
3345
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3346
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3349
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3350
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3351
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3356
* @ctxt: an XML parser context
3358
* parse an XML name.
3360
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3361
* CombiningChar | Extender
3363
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3365
* [6] Names ::= Name (#x20 Name)*
3367
* Returns the Name parsed or NULL
3371
xmlParseName(xmlParserCtxtPtr ctxt) {
3383
* Accelerator for simple ASCII names
3385
in = ctxt->input->cur;
3386
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
((*in >= 0x41) && (*in <= 0x5A)) ||
3388
(*in == '_') || (*in == ':')) {
3390
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
((*in >= 0x41) && (*in <= 0x5A)) ||
3392
((*in >= 0x30) && (*in <= 0x39)) ||
3393
(*in == '_') || (*in == '-') ||
3394
(*in == ':') || (*in == '.'))
3396
if ((*in > 0) && (*in < 0x80)) {
3397
count = in - ctxt->input->cur;
3398
if ((count > XML_MAX_NAME_LENGTH) &&
3399
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3400
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3403
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3404
ctxt->input->cur = in;
3405
ctxt->nbChars += count;
3406
ctxt->input->col += count;
3408
xmlErrMemory(ctxt, NULL);
3412
/* accelerator for special cases */
3413
return(xmlParseNameComplex(ctxt));
3416
static const xmlChar *
3417
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3421
const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3424
nbParseNCNameComplex++;
3428
* Handler for more complex cases
3431
end = ctxt->input->cur;
3433
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3434
(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3438
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3439
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
3440
if (count++ > XML_PARSER_CHUNK_SIZE) {
3441
if ((len > XML_MAX_NAME_LENGTH) &&
3442
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3443
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3448
if (ctxt->instate == XML_PARSER_EOF)
3453
end = ctxt->input->cur;
3458
if (ctxt->instate == XML_PARSER_EOF)
3460
end = ctxt->input->cur;
3464
if ((len > XML_MAX_NAME_LENGTH) &&
3465
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3466
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3469
return(xmlDictLookup(ctxt->dict, end - len, len));
3474
* @ctxt: an XML parser context
3475
* @len: length of the string parsed
3477
* parse an XML name.
3479
* [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3480
* CombiningChar | Extender
3482
* [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
* Returns the Name parsed or NULL
3487
static const xmlChar *
3488
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3498
* Accelerator for simple ASCII names
3500
in = ctxt->input->cur;
3501
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3502
((*in >= 0x41) && (*in <= 0x5A)) ||
3505
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3506
((*in >= 0x41) && (*in <= 0x5A)) ||
3507
((*in >= 0x30) && (*in <= 0x39)) ||
3508
(*in == '_') || (*in == '-') ||
3511
if ((*in > 0) && (*in < 0x80)) {
3512
count = in - ctxt->input->cur;
3513
if ((count > XML_MAX_NAME_LENGTH) &&
3514
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3515
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3518
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3519
ctxt->input->cur = in;
3520
ctxt->nbChars += count;
3521
ctxt->input->col += count;
3523
xmlErrMemory(ctxt, NULL);
3528
return(xmlParseNCNameComplex(ctxt));
3532
* xmlParseNameAndCompare:
3533
* @ctxt: an XML parser context
3535
* parse an XML name and compares for match
3536
* (specialized for endtag parsing)
3538
* Returns NULL for an illegal name, (xmlChar*) 1 for success
3539
* and the name for mismatch
3542
static const xmlChar *
3543
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3544
register const xmlChar *cmp = other;
3545
register const xmlChar *in;
3549
if (ctxt->instate == XML_PARSER_EOF)
3552
in = ctxt->input->cur;
3553
while (*in != 0 && *in == *cmp) {
3558
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3560
ctxt->input->cur = in;
3561
return (const xmlChar*) 1;
3563
/* failure (or end of input buffer), check with full function */
3564
ret = xmlParseName (ctxt);
3565
/* strings coming from the dictionnary direct compare possible */
3567
return (const xmlChar*) 1;
3573
* xmlParseStringName:
3574
* @ctxt: an XML parser context
3575
* @str: a pointer to the string pointer (IN/OUT)
3577
* parse an XML name.
3579
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3580
* CombiningChar | Extender
3582
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3584
* [6] Names ::= Name (#x20 Name)*
3586
* Returns the Name parsed or NULL. The @str pointer
3587
* is updated to the current location in the string.
3591
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3592
xmlChar buf[XML_MAX_NAMELEN + 5];
3593
const xmlChar *cur = *str;
3598
nbParseStringName++;
3601
c = CUR_SCHAR(cur, l);
3602
if (!xmlIsNameStartChar(ctxt, c)) {
3606
COPY_BUF(l,buf,len,c);
3608
c = CUR_SCHAR(cur, l);
3609
while (xmlIsNameChar(ctxt, c)) {
3610
COPY_BUF(l,buf,len,c);
3612
c = CUR_SCHAR(cur, l);
3613
if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3615
* Okay someone managed to make a huge name, so he's ready to pay
3616
* for the processing speed.
3621
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3622
if (buffer == NULL) {
3623
xmlErrMemory(ctxt, NULL);
3626
memcpy(buffer, buf, len);
3627
while (xmlIsNameChar(ctxt, c)) {
3628
if (len + 10 > max) {
3631
if ((len > XML_MAX_NAME_LENGTH) &&
3632
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3633
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638
tmp = (xmlChar *) xmlRealloc(buffer,
3639
max * sizeof(xmlChar));
3641
xmlErrMemory(ctxt, NULL);
3647
COPY_BUF(l,buffer,len,c);
3649
c = CUR_SCHAR(cur, l);
3656
if ((len > XML_MAX_NAME_LENGTH) &&
3657
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3658
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3662
return(xmlStrndup(buf, len));
3667
* @ctxt: an XML parser context
3669
* parse an XML Nmtoken.
3671
* [7] Nmtoken ::= (NameChar)+
3673
* [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3675
* Returns the Nmtoken parsed or NULL
3679
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3680
xmlChar buf[XML_MAX_NAMELEN + 5];
3690
if (ctxt->instate == XML_PARSER_EOF)
3694
while (xmlIsNameChar(ctxt, c)) {
3695
if (count++ > XML_PARSER_CHUNK_SIZE) {
3699
COPY_BUF(l,buf,len,c);
3705
if (ctxt->instate == XML_PARSER_EOF)
3709
if (len >= XML_MAX_NAMELEN) {
3711
* Okay someone managed to make a huge token, so he's ready to pay
3712
* for the processing speed.
3717
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3718
if (buffer == NULL) {
3719
xmlErrMemory(ctxt, NULL);
3722
memcpy(buffer, buf, len);
3723
while (xmlIsNameChar(ctxt, c)) {
3724
if (count++ > XML_PARSER_CHUNK_SIZE) {
3727
if (ctxt->instate == XML_PARSER_EOF) {
3732
if (len + 10 > max) {
3735
if ((max > XML_MAX_NAME_LENGTH) &&
3736
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3737
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3742
tmp = (xmlChar *) xmlRealloc(buffer,
3743
max * sizeof(xmlChar));
3745
xmlErrMemory(ctxt, NULL);
3751
COPY_BUF(l,buffer,len,c);
3761
if ((len > XML_MAX_NAME_LENGTH) &&
3762
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3763
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3766
return(xmlStrndup(buf, len));
3770
* xmlParseEntityValue:
3771
* @ctxt: an XML parser context
3772
* @orig: if non-NULL store a copy of the original entity value
3774
* parse a value for ENTITY declarations
3776
* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3777
* "'" ([^%&'] | PEReference | Reference)* "'"
3779
* Returns the EntityValue parsed with reference substituted or NULL
3783
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3784
xmlChar *buf = NULL;
3786
int size = XML_PARSER_BUFFER_SIZE;
3789
xmlChar *ret = NULL;
3790
const xmlChar *cur = NULL;
3791
xmlParserInputPtr input;
3793
if (RAW == '"') stop = '"';
3794
else if (RAW == '\'') stop = '\'';
3796
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3799
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3801
xmlErrMemory(ctxt, NULL);
3806
* The content of the entity definition is copied in a buffer.
3809
ctxt->instate = XML_PARSER_ENTITY_VALUE;
3810
input = ctxt->input;
3812
if (ctxt->instate == XML_PARSER_EOF) {
3819
* NOTE: 4.4.5 Included in Literal
3820
* When a parameter entity reference appears in a literal entity
3821
* value, ... a single or double quote character in the replacement
3822
* text is always treated as a normal data character and will not
3823
* terminate the literal.
3824
* In practice it means we stop the loop only when back at parsing
3825
* the initial entity and the quote is found
3827
while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3828
(ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3829
if (len + 5 >= size) {
3833
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3835
xmlErrMemory(ctxt, NULL);
3841
COPY_BUF(l,buf,len,c);
3844
* Pop-up of finished entities.
3846
while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3857
if (ctxt->instate == XML_PARSER_EOF) {
3863
* Raise problem w.r.t. '&' and '%' being used in non-entities
3864
* reference constructs. Note Charref will be handled in
3865
* xmlStringDecodeEntities()
3868
while (*cur != 0) { /* non input consuming */
3869
if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3874
name = xmlParseStringName(ctxt, &cur);
3875
if ((name == NULL) || (*cur != ';')) {
3876
xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3877
"EntityValue: '%c' forbidden except for entities references\n",
3880
if ((tmp == '%') && (ctxt->inSubset == 1) &&
3881
(ctxt->inputNr == 1)) {
3882
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3893
* Then PEReference entities are substituted.
3896
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3901
* NOTE: 4.4.7 Bypassed
3902
* When a general entity reference appears in the EntityValue in
3903
* an entity declaration, it is bypassed and left as is.
3904
* so XML_SUBSTITUTE_REF is not set here.
3906
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3918
* xmlParseAttValueComplex:
3919
* @ctxt: an XML parser context
3920
* @len: the resulting attribute len
3921
* @normalize: wether to apply the inner normalization
3923
* parse a value for an attribute, this is the fallback function
3924
* of xmlParseAttValue() when the attribute parsing requires handling
3925
* of non-ASCII characters, or normalization compaction.
3927
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3930
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932
xmlChar *buf = NULL;
3933
xmlChar *rep = NULL;
3935
size_t buf_size = 0;
3936
int c, l, in_space = 0;
3937
xmlChar *current = NULL;
3940
if (NXT(0) == '"') {
3941
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3944
} else if (NXT(0) == '\'') {
3946
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3949
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954
* allocate a translation buffer.
3956
buf_size = XML_PARSER_BUFFER_SIZE;
3957
buf = (xmlChar *) xmlMallocAtomic(buf_size);
3958
if (buf == NULL) goto mem_error;
3961
* OK loop until we reach one of the ending char or a size limit.
3964
while (((NXT(0) != limit) && /* checked */
3965
(IS_CHAR(c)) && (c != '<')) &&
3966
(ctxt->instate != XML_PARSER_EOF)) {
3968
* Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3969
* special option is given
3971
if ((len > XML_MAX_TEXT_LENGTH) &&
3972
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3973
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3974
"AttValue length too long\n");
3980
if (NXT(1) == '#') {
3981
int val = xmlParseCharRef(ctxt);
3984
if (ctxt->replaceEntities) {
3985
if (len + 10 > buf_size) {
3986
growBuffer(buf, 10);
3991
* The reparsing will be done in xmlStringGetNodeList()
3992
* called by the attribute() function in SAX.c
3994
if (len + 10 > buf_size) {
3995
growBuffer(buf, 10);
4003
} else if (val != 0) {
4004
if (len + 10 > buf_size) {
4005
growBuffer(buf, 10);
4007
len += xmlCopyChar(0, &buf[len], val);
4010
ent = xmlParseEntityRef(ctxt);
4013
ctxt->nbentities += ent->owner;
4014
if ((ent != NULL) &&
4015
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4016
if (len + 10 > buf_size) {
4017
growBuffer(buf, 10);
4019
if ((ctxt->replaceEntities == 0) &&
4020
(ent->content[0] == '&')) {
4027
buf[len++] = ent->content[0];
4029
} else if ((ent != NULL) &&
4030
(ctxt->replaceEntities != 0)) {
4031
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4032
rep = xmlStringDecodeEntities(ctxt, ent->content,
4037
while (*current != 0) { /* non input consuming */
4038
if ((*current == 0xD) || (*current == 0xA) ||
4039
(*current == 0x9)) {
4043
buf[len++] = *current++;
4044
if (len + 10 > buf_size) {
4045
growBuffer(buf, 10);
4052
if (len + 10 > buf_size) {
4053
growBuffer(buf, 10);
4055
if (ent->content != NULL)
4056
buf[len++] = ent->content[0];
4058
} else if (ent != NULL) {
4059
int i = xmlStrlen(ent->name);
4060
const xmlChar *cur = ent->name;
4063
* This may look absurd but is needed to detect
4066
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4067
(ent->content != NULL) && (ent->checked == 0)) {
4068
unsigned long oldnbent = ctxt->nbentities;
4070
rep = xmlStringDecodeEntities(ctxt, ent->content,
4071
XML_SUBSTITUTE_REF, 0, 0, 0);
4073
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4075
if (xmlStrchr(rep, '<'))
4083
* Just output the reference
4086
while (len + i + 10 > buf_size) {
4087
growBuffer(buf, i + 10);
4090
buf[len++] = *cur++;
4095
if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4096
if ((len != 0) || (!normalize)) {
4097
if ((!normalize) || (!in_space)) {
4098
COPY_BUF(l,buf,len,0x20);
4099
while (len + 10 > buf_size) {
4100
growBuffer(buf, 10);
4107
COPY_BUF(l,buf,len,c);
4108
if (len + 10 > buf_size) {
4109
growBuffer(buf, 10);
4117
if (ctxt->instate == XML_PARSER_EOF)
4120
if ((in_space) && (normalize)) {
4121
while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4125
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4126
} else if (RAW != limit) {
4127
if ((c != 0) && (!IS_CHAR(c))) {
4128
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4129
"invalid character in attribute value\n");
4131
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4132
"AttValue: ' expected\n");
4138
* There we potentially risk an overflow, don't allow attribute value of
4139
* length more than INT_MAX it is a very reasonnable assumption !
4141
if (len >= INT_MAX) {
4142
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4143
"AttValue length too long\n");
4147
if (attlen != NULL) *attlen = (int) len;
4151
xmlErrMemory(ctxt, NULL);
4162
* @ctxt: an XML parser context
4164
* parse a value for an attribute
4165
* Note: the parser won't do substitution of entities here, this
4166
* will be handled later in xmlStringGetNodeList
4168
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4169
* "'" ([^<&'] | Reference)* "'"
4171
* 3.3.3 Attribute-Value Normalization:
4172
* Before the value of an attribute is passed to the application or
4173
* checked for validity, the XML processor must normalize it as follows:
4174
* - a character reference is processed by appending the referenced
4175
* character to the attribute value
4176
* - an entity reference is processed by recursively processing the
4177
* replacement text of the entity
4178
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4179
* appending #x20 to the normalized value, except that only a single
4180
* #x20 is appended for a "#xD#xA" sequence that is part of an external
4181
* parsed entity or the literal entity value of an internal parsed entity
4182
* - other characters are processed by appending them to the normalized value
4183
* If the declared value is not CDATA, then the XML processor must further
4184
* process the normalized attribute value by discarding any leading and
4185
* trailing space (#x20) characters, and by replacing sequences of space
4186
* (#x20) characters by a single space (#x20) character.
4187
* All attributes for which no declaration has been read should be treated
4188
* by a non-validating parser as if declared CDATA.
4190
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4195
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4196
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4197
return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4201
* xmlParseSystemLiteral:
4202
* @ctxt: an XML parser context
4204
* parse an XML Literal
4206
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4208
* Returns the SystemLiteral parsed or NULL
4212
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4213
xmlChar *buf = NULL;
4215
int size = XML_PARSER_BUFFER_SIZE;
4218
int state = ctxt->instate;
4225
} else if (RAW == '\'') {
4229
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4233
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4235
xmlErrMemory(ctxt, NULL);
4238
ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4240
while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4241
if (len + 5 >= size) {
4244
if ((size > XML_MAX_NAME_LENGTH) &&
4245
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4246
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4248
ctxt->instate = (xmlParserInputState) state;
4252
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4255
xmlErrMemory(ctxt, NULL);
4256
ctxt->instate = (xmlParserInputState) state;
4265
if (ctxt->instate == XML_PARSER_EOF) {
4270
COPY_BUF(l,buf,len,cur);
4280
ctxt->instate = (xmlParserInputState) state;
4281
if (!IS_CHAR(cur)) {
4282
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4290
* xmlParsePubidLiteral:
4291
* @ctxt: an XML parser context
4293
* parse an XML public literal
4295
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4297
* Returns the PubidLiteral parsed or NULL.
4301
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4302
xmlChar *buf = NULL;
4304
int size = XML_PARSER_BUFFER_SIZE;
4308
xmlParserInputState oldstate = ctxt->instate;
4314
} else if (RAW == '\'') {
4318
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4321
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4323
xmlErrMemory(ctxt, NULL);
4326
ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4328
while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4329
if (len + 1 >= size) {
4332
if ((size > XML_MAX_NAME_LENGTH) &&
4333
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4334
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4339
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4341
xmlErrMemory(ctxt, NULL);
4352
if (ctxt->instate == XML_PARSER_EOF) {
4367
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4371
ctxt->instate = oldstate;
4375
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4378
* used for the test in the inner loop of the char data testing
4380
static const unsigned char test_char_data[256] = {
4381
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382
0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4383
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4386
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4387
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4388
0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4389
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4390
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4391
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4392
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4393
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4394
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4395
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4396
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4397
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4398
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4417
* @ctxt: an XML parser context
4418
* @cdata: int indicating whether we are within a CDATA section
4420
* parse a CharData section.
4421
* if we are within a CDATA section ']]>' marks an end of section.
4423
* The right angle bracket (>) may be represented using the string ">",
4424
* and must, for compatibility, be escaped using ">" or a character
4425
* reference when it appears in the string "]]>" in content, when that
4426
* string is not marking the end of a CDATA section.
4428
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4432
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4435
int line = ctxt->input->line;
4436
int col = ctxt->input->col;
4442
* Accelerated common case where input don't need to be
4443
* modified before passing it to the handler.
4446
in = ctxt->input->cur;
4449
while (*in == 0x20) { in++; ctxt->input->col++; }
4452
ctxt->input->line++; ctxt->input->col = 1;
4454
} while (*in == 0xA);
4455
goto get_more_space;
4458
nbchar = in - ctxt->input->cur;
4460
const xmlChar *tmp = ctxt->input->cur;
4461
ctxt->input->cur = in;
4463
if ((ctxt->sax != NULL) &&
4464
(ctxt->sax->ignorableWhitespace !=
4465
ctxt->sax->characters)) {
4466
if (areBlanks(ctxt, tmp, nbchar, 1)) {
4467
if (ctxt->sax->ignorableWhitespace != NULL)
4468
ctxt->sax->ignorableWhitespace(ctxt->userData,
4471
if (ctxt->sax->characters != NULL)
4472
ctxt->sax->characters(ctxt->userData,
4474
if (*ctxt->space == -1)
4477
} else if ((ctxt->sax != NULL) &&
4478
(ctxt->sax->characters != NULL)) {
4479
ctxt->sax->characters(ctxt->userData,
4487
ccol = ctxt->input->col;
4488
while (test_char_data[*in]) {
4492
ctxt->input->col = ccol;
4495
ctxt->input->line++; ctxt->input->col = 1;
4497
} while (*in == 0xA);
4501
if ((in[1] == ']') && (in[2] == '>')) {
4502
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4503
ctxt->input->cur = in;
4510
nbchar = in - ctxt->input->cur;
4512
if ((ctxt->sax != NULL) &&
4513
(ctxt->sax->ignorableWhitespace !=
4514
ctxt->sax->characters) &&
4515
(IS_BLANK_CH(*ctxt->input->cur))) {
4516
const xmlChar *tmp = ctxt->input->cur;
4517
ctxt->input->cur = in;
4519
if (areBlanks(ctxt, tmp, nbchar, 0)) {
4520
if (ctxt->sax->ignorableWhitespace != NULL)
4521
ctxt->sax->ignorableWhitespace(ctxt->userData,
4524
if (ctxt->sax->characters != NULL)
4525
ctxt->sax->characters(ctxt->userData,
4527
if (*ctxt->space == -1)
4530
line = ctxt->input->line;
4531
col = ctxt->input->col;
4532
} else if (ctxt->sax != NULL) {
4533
if (ctxt->sax->characters != NULL)
4534
ctxt->sax->characters(ctxt->userData,
4535
ctxt->input->cur, nbchar);
4536
line = ctxt->input->line;
4537
col = ctxt->input->col;
4539
/* something really bad happened in the SAX callback */
4540
if (ctxt->instate != XML_PARSER_CONTENT)
4543
ctxt->input->cur = in;
4547
ctxt->input->cur = in;
4549
ctxt->input->line++; ctxt->input->col = 1;
4550
continue; /* while */
4562
if (ctxt->instate == XML_PARSER_EOF)
4564
in = ctxt->input->cur;
4565
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4568
ctxt->input->line = line;
4569
ctxt->input->col = col;
4570
xmlParseCharDataComplex(ctxt, cdata);
4574
* xmlParseCharDataComplex:
4575
* @ctxt: an XML parser context
4576
* @cdata: int indicating whether we are within a CDATA section
4578
* parse a CharData section.this is the fallback function
4579
* of xmlParseCharData() when the parsing requires handling
4580
* of non-ASCII characters.
4583
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4584
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4592
while ((cur != '<') && /* checked */
4594
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4595
if ((cur == ']') && (NXT(1) == ']') &&
4599
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4602
COPY_BUF(l,buf,nbchar,cur);
4603
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4607
* OK the segment is to be consumed as chars.
4609
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4610
if (areBlanks(ctxt, buf, nbchar, 0)) {
4611
if (ctxt->sax->ignorableWhitespace != NULL)
4612
ctxt->sax->ignorableWhitespace(ctxt->userData,
4615
if (ctxt->sax->characters != NULL)
4616
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4617
if ((ctxt->sax->characters !=
4618
ctxt->sax->ignorableWhitespace) &&
4619
(*ctxt->space == -1))
4624
/* something really bad happened in the SAX callback */
4625
if (ctxt->instate != XML_PARSER_CONTENT)
4632
if (ctxt->instate == XML_PARSER_EOF)
4641
* OK the segment is to be consumed as chars.
4643
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4644
if (areBlanks(ctxt, buf, nbchar, 0)) {
4645
if (ctxt->sax->ignorableWhitespace != NULL)
4646
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4648
if (ctxt->sax->characters != NULL)
4649
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4650
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4651
(*ctxt->space == -1))
4656
if ((cur != 0) && (!IS_CHAR(cur))) {
4657
/* Generate the error and skip the offending character */
4658
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4659
"PCDATA invalid Char value %d\n",
4666
* xmlParseExternalID:
4667
* @ctxt: an XML parser context
4668
* @publicID: a xmlChar** receiving PubidLiteral
4669
* @strict: indicate whether we should restrict parsing to only
4670
* production [75], see NOTE below
4672
* Parse an External ID or a Public ID
4674
* NOTE: Productions [75] and [83] interact badly since [75] can generate
4675
* 'PUBLIC' S PubidLiteral S SystemLiteral
4677
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4678
* | 'PUBLIC' S PubidLiteral S SystemLiteral
4680
* [83] PublicID ::= 'PUBLIC' S PubidLiteral
4682
* Returns the function returns SystemLiteral and in the second
4683
* case publicID receives PubidLiteral, is strict is off
4684
* it is possible to return NULL and have publicID set.
4688
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4689
xmlChar *URI = NULL;
4694
if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4696
if (!IS_BLANK_CH(CUR)) {
4697
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4698
"Space required after 'SYSTEM'\n");
4701
URI = xmlParseSystemLiteral(ctxt);
4703
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4705
} else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4707
if (!IS_BLANK_CH(CUR)) {
4708
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4709
"Space required after 'PUBLIC'\n");
4712
*publicID = xmlParsePubidLiteral(ctxt);
4713
if (*publicID == NULL) {
4714
xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4718
* We don't handle [83] so "S SystemLiteral" is required.
4720
if (!IS_BLANK_CH(CUR)) {
4721
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4722
"Space required after the Public Identifier\n");
4726
* We handle [83] so we return immediately, if
4727
* "S SystemLiteral" is not detected. From a purely parsing
4728
* point of view that's a nice mess.
4734
if (!IS_BLANK_CH(*ptr)) return(NULL);
4736
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4737
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4740
URI = xmlParseSystemLiteral(ctxt);
4742
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4749
* xmlParseCommentComplex:
4750
* @ctxt: an XML parser context
4751
* @buf: the already parsed part of the buffer
4752
* @len: number of bytes filles in the buffer
4753
* @size: allocated size of the buffer
4755
* Skip an XML (SGML) comment <!-- .... -->
4756
* The spec says that "For compatibility, the string "--" (double-hyphen)
4757
* must not occur within comments. "
4758
* This is the slow routine in case the accelerator for ascii didn't work
4760
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4763
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4764
size_t len, size_t size) {
4771
inputid = ctxt->input->id;
4775
size = XML_PARSER_BUFFER_SIZE;
4776
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4778
xmlErrMemory(ctxt, NULL);
4782
GROW; /* Assure there's enough input data */
4785
goto not_terminated;
4787
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4788
"xmlParseComment: invalid xmlChar value %d\n",
4796
goto not_terminated;
4798
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4799
"xmlParseComment: invalid xmlChar value %d\n",
4807
goto not_terminated;
4808
while (IS_CHAR(cur) && /* checked */
4810
(r != '-') || (q != '-'))) {
4811
if ((r == '-') && (q == '-')) {
4812
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4814
if ((len > XML_MAX_TEXT_LENGTH) &&
4815
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4816
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4817
"Comment too big found", NULL);
4821
if (len + 5 >= size) {
4825
new_size = size * 2;
4826
new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4827
if (new_buf == NULL) {
4829
xmlErrMemory(ctxt, NULL);
4835
COPY_BUF(ql,buf,len,q);
4845
if (ctxt->instate == XML_PARSER_EOF) {
4860
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4861
"Comment not terminated \n<!--%.50s\n", buf);
4862
} else if (!IS_CHAR(cur)) {
4863
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4864
"xmlParseComment: invalid xmlChar value %d\n",
4867
if (inputid != ctxt->input->id) {
4868
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4869
"Comment doesn't start and stop in the same entity\n");
4872
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4873
(!ctxt->disableSAX))
4874
ctxt->sax->comment(ctxt->userData, buf);
4879
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4880
"Comment not terminated\n", NULL);
4887
* @ctxt: an XML parser context
4889
* Skip an XML (SGML) comment <!-- .... -->
4890
* The spec says that "For compatibility, the string "--" (double-hyphen)
4891
* must not occur within comments. "
4893
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4896
xmlParseComment(xmlParserCtxtPtr ctxt) {
4897
xmlChar *buf = NULL;
4898
size_t size = XML_PARSER_BUFFER_SIZE;
4900
xmlParserInputState state;
4907
* Check that there is a comment right here.
4909
if ((RAW != '<') || (NXT(1) != '!') ||
4910
(NXT(2) != '-') || (NXT(3) != '-')) return;
4911
state = ctxt->instate;
4912
ctxt->instate = XML_PARSER_COMMENT;
4913
inputid = ctxt->input->id;
4919
* Accelerated common case where input don't need to be
4920
* modified before passing it to the handler.
4922
in = ctxt->input->cur;
4926
ctxt->input->line++; ctxt->input->col = 1;
4928
} while (*in == 0xA);
4931
ccol = ctxt->input->col;
4932
while (((*in > '-') && (*in <= 0x7F)) ||
4933
((*in >= 0x20) && (*in < '-')) ||
4938
ctxt->input->col = ccol;
4941
ctxt->input->line++; ctxt->input->col = 1;
4943
} while (*in == 0xA);
4946
nbchar = in - ctxt->input->cur;
4948
* save current set of data
4951
if ((ctxt->sax != NULL) &&
4952
(ctxt->sax->comment != NULL)) {
4954
if ((*in == '-') && (in[1] == '-'))
4957
size = XML_PARSER_BUFFER_SIZE + nbchar;
4958
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4960
xmlErrMemory(ctxt, NULL);
4961
ctxt->instate = state;
4965
} else if (len + nbchar + 1 >= size) {
4967
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4968
new_buf = (xmlChar *) xmlRealloc(buf,
4969
size * sizeof(xmlChar));
4970
if (new_buf == NULL) {
4972
xmlErrMemory(ctxt, NULL);
4973
ctxt->instate = state;
4978
memcpy(&buf[len], ctxt->input->cur, nbchar);
4983
if ((len > XML_MAX_TEXT_LENGTH) &&
4984
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4985
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4986
"Comment too big found", NULL);
4990
ctxt->input->cur = in;
4993
ctxt->input->line++; ctxt->input->col = 1;
4998
ctxt->input->cur = in;
5000
ctxt->input->line++; ctxt->input->col = 1;
5001
continue; /* while */
5007
if (ctxt->instate == XML_PARSER_EOF) {
5011
in = ctxt->input->cur;
5015
if (ctxt->input->id != inputid) {
5016
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5017
"comment doesn't start and stop in the same entity\n");
5020
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5021
(!ctxt->disableSAX)) {
5023
ctxt->sax->comment(ctxt->userData, buf);
5025
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5029
if (ctxt->instate != XML_PARSER_EOF)
5030
ctxt->instate = state;
5034
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5035
"Double hyphen within comment: "
5039
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5040
"Double hyphen within comment\n", NULL);
5048
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5049
xmlParseCommentComplex(ctxt, buf, len, size);
5050
ctxt->instate = state;
5057
* @ctxt: an XML parser context
5059
* parse the name of a PI
5061
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5063
* Returns the PITarget name or NULL
5067
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5068
const xmlChar *name;
5070
name = xmlParseName(ctxt);
5071
if ((name != NULL) &&
5072
((name[0] == 'x') || (name[0] == 'X')) &&
5073
((name[1] == 'm') || (name[1] == 'M')) &&
5074
((name[2] == 'l') || (name[2] == 'L'))) {
5076
if ((name[0] == 'x') && (name[1] == 'm') &&
5077
(name[2] == 'l') && (name[3] == 0)) {
5078
xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5079
"XML declaration allowed only at the start of the document\n");
5081
} else if (name[3] == 0) {
5082
xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5086
if (xmlW3CPIs[i] == NULL) break;
5087
if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5090
xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5091
"xmlParsePITarget: invalid name prefix 'xml'\n",
5094
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5095
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5096
"colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5101
#ifdef LIBXML_CATALOG_ENABLED
5103
* xmlParseCatalogPI:
5104
* @ctxt: an XML parser context
5105
* @catalog: the PI value string
5107
* parse an XML Catalog Processing Instruction.
5109
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5111
* Occurs only if allowed by the user and if happening in the Misc
5112
* part of the document before any doctype informations
5113
* This will add the given catalog to the parsing context in order
5114
* to be used if there is a resolution need further down in the document
5118
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5119
xmlChar *URL = NULL;
5120
const xmlChar *tmp, *base;
5124
while (IS_BLANK_CH(*tmp)) tmp++;
5125
if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5128
while (IS_BLANK_CH(*tmp)) tmp++;
5133
while (IS_BLANK_CH(*tmp)) tmp++;
5135
if ((marker != '\'') && (marker != '"'))
5139
while ((*tmp != 0) && (*tmp != marker)) tmp++;
5142
URL = xmlStrndup(base, tmp - base);
5144
while (IS_BLANK_CH(*tmp)) tmp++;
5149
ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5155
xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5156
"Catalog PI syntax error: %s\n",
5165
* @ctxt: an XML parser context
5167
* parse an XML Processing Instruction.
5169
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5171
* The processing is transfered to SAX once parsed.
5175
xmlParsePI(xmlParserCtxtPtr ctxt) {
5176
xmlChar *buf = NULL;
5178
size_t size = XML_PARSER_BUFFER_SIZE;
5180
const xmlChar *target;
5181
xmlParserInputState state;
5184
if ((RAW == '<') && (NXT(1) == '?')) {
5185
xmlParserInputPtr input = ctxt->input;
5186
state = ctxt->instate;
5187
ctxt->instate = XML_PARSER_PI;
5189
* this is a Processing Instruction.
5195
* Parse the target name and check for special support like
5198
target = xmlParsePITarget(ctxt);
5199
if (target != NULL) {
5200
if ((RAW == '?') && (NXT(1) == '>')) {
5201
if (input != ctxt->input) {
5202
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5203
"PI declaration doesn't start and stop in the same entity\n");
5210
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5211
(ctxt->sax->processingInstruction != NULL))
5212
ctxt->sax->processingInstruction(ctxt->userData,
5214
if (ctxt->instate != XML_PARSER_EOF)
5215
ctxt->instate = state;
5218
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5220
xmlErrMemory(ctxt, NULL);
5221
ctxt->instate = state;
5225
if (!IS_BLANK(cur)) {
5226
xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5227
"ParsePI: PI %s space expected\n", target);
5231
while (IS_CHAR(cur) && /* checked */
5232
((cur != '?') || (NXT(1) != '>'))) {
5233
if (len + 5 >= size) {
5235
size_t new_size = size * 2;
5236
tmp = (xmlChar *) xmlRealloc(buf, new_size);
5238
xmlErrMemory(ctxt, NULL);
5240
ctxt->instate = state;
5249
if (ctxt->instate == XML_PARSER_EOF) {
5254
if ((len > XML_MAX_TEXT_LENGTH) &&
5255
((ctxt->options & XML_PARSE_HUGE) == 0)) {
5256
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5257
"PI %s too big found", target);
5259
ctxt->instate = state;
5263
COPY_BUF(l,buf,len,cur);
5272
if ((len > XML_MAX_TEXT_LENGTH) &&
5273
((ctxt->options & XML_PARSE_HUGE) == 0)) {
5274
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5275
"PI %s too big found", target);
5277
ctxt->instate = state;
5282
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5283
"ParsePI: PI %s never end ...\n", target);
5285
if (input != ctxt->input) {
5286
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5287
"PI declaration doesn't start and stop in the same entity\n");
5291
#ifdef LIBXML_CATALOG_ENABLED
5292
if (((state == XML_PARSER_MISC) ||
5293
(state == XML_PARSER_START)) &&
5294
(xmlStrEqual(target, XML_CATALOG_PI))) {
5295
xmlCatalogAllow allow = xmlCatalogGetDefaults();
5296
if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5297
(allow == XML_CATA_ALLOW_ALL))
5298
xmlParseCatalogPI(ctxt, buf);
5306
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5307
(ctxt->sax->processingInstruction != NULL))
5308
ctxt->sax->processingInstruction(ctxt->userData,
5313
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5315
if (ctxt->instate != XML_PARSER_EOF)
5316
ctxt->instate = state;
5321
* xmlParseNotationDecl:
5322
* @ctxt: an XML parser context
5324
* parse a notation declaration
5326
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5328
* Hence there is actually 3 choices:
5329
* 'PUBLIC' S PubidLiteral
5330
* 'PUBLIC' S PubidLiteral S SystemLiteral
5331
* and 'SYSTEM' S SystemLiteral
5333
* See the NOTE on xmlParseExternalID().
5337
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5338
const xmlChar *name;
5342
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5343
xmlParserInputPtr input = ctxt->input;
5346
if (!IS_BLANK_CH(CUR)) {
5347
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5348
"Space required after '<!NOTATION'\n");
5353
name = xmlParseName(ctxt);
5355
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5358
if (!IS_BLANK_CH(CUR)) {
5359
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5360
"Space required after the NOTATION name'\n");
5363
if (xmlStrchr(name, ':') != NULL) {
5364
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5365
"colon are forbidden from notation names '%s'\n",
5373
Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5377
if (input != ctxt->input) {
5378
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5379
"Notation declaration doesn't start and stop in the same entity\n");
5382
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5383
(ctxt->sax->notationDecl != NULL))
5384
ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5386
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5388
if (Systemid != NULL) xmlFree(Systemid);
5389
if (Pubid != NULL) xmlFree(Pubid);
5394
* xmlParseEntityDecl:
5395
* @ctxt: an XML parser context
5397
* parse <!ENTITY declarations
5399
* [70] EntityDecl ::= GEDecl | PEDecl
5401
* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5403
* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5405
* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5407
* [74] PEDef ::= EntityValue | ExternalID
5409
* [76] NDataDecl ::= S 'NDATA' S Name
5411
* [ VC: Notation Declared ]
5412
* The Name must match the declared name of a notation.
5416
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5417
const xmlChar *name = NULL;
5418
xmlChar *value = NULL;
5419
xmlChar *URI = NULL, *literal = NULL;
5420
const xmlChar *ndata = NULL;
5421
int isParameter = 0;
5422
xmlChar *orig = NULL;
5425
/* GROW; done in the caller */
5426
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5427
xmlParserInputPtr input = ctxt->input;
5430
skipped = SKIP_BLANKS;
5432
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5433
"Space required after '<!ENTITY'\n");
5438
skipped = SKIP_BLANKS;
5440
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5441
"Space required after '%'\n");
5446
name = xmlParseName(ctxt);
5448
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5449
"xmlParseEntityDecl: no name\n");
5452
if (xmlStrchr(name, ':') != NULL) {
5453
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5454
"colon are forbidden from entities names '%s'\n",
5457
skipped = SKIP_BLANKS;
5459
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
"Space required after the entity name\n");
5463
ctxt->instate = XML_PARSER_ENTITY_DECL;
5465
* handle the various case of definitions...
5468
if ((RAW == '"') || (RAW == '\'')) {
5469
value = xmlParseEntityValue(ctxt, &orig);
5471
if ((ctxt->sax != NULL) &&
5472
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5473
ctxt->sax->entityDecl(ctxt->userData, name,
5474
XML_INTERNAL_PARAMETER_ENTITY,
5478
URI = xmlParseExternalID(ctxt, &literal, 1);
5479
if ((URI == NULL) && (literal == NULL)) {
5480
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5485
uri = xmlParseURI((const char *) URI);
5487
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5488
"Invalid URI: %s\n", URI);
5490
* This really ought to be a well formedness error
5491
* but the XML Core WG decided otherwise c.f. issue
5492
* E26 of the XML erratas.
5495
if (uri->fragment != NULL) {
5497
* Okay this is foolish to block those but not
5500
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5502
if ((ctxt->sax != NULL) &&
5503
(!ctxt->disableSAX) &&
5504
(ctxt->sax->entityDecl != NULL))
5505
ctxt->sax->entityDecl(ctxt->userData, name,
5506
XML_EXTERNAL_PARAMETER_ENTITY,
5507
literal, URI, NULL);
5514
if ((RAW == '"') || (RAW == '\'')) {
5515
value = xmlParseEntityValue(ctxt, &orig);
5516
if ((ctxt->sax != NULL) &&
5517
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5518
ctxt->sax->entityDecl(ctxt->userData, name,
5519
XML_INTERNAL_GENERAL_ENTITY,
5522
* For expat compatibility in SAX mode.
5524
if ((ctxt->myDoc == NULL) ||
5525
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5526
if (ctxt->myDoc == NULL) {
5527
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5528
if (ctxt->myDoc == NULL) {
5529
xmlErrMemory(ctxt, "New Doc failed");
5532
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5534
if (ctxt->myDoc->intSubset == NULL)
5535
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5536
BAD_CAST "fake", NULL, NULL);
5538
xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5542
URI = xmlParseExternalID(ctxt, &literal, 1);
5543
if ((URI == NULL) && (literal == NULL)) {
5544
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5549
uri = xmlParseURI((const char *)URI);
5551
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5552
"Invalid URI: %s\n", URI);
5554
* This really ought to be a well formedness error
5555
* but the XML Core WG decided otherwise c.f. issue
5556
* E26 of the XML erratas.
5559
if (uri->fragment != NULL) {
5561
* Okay this is foolish to block those but not
5564
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5569
if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5570
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5571
"Space required before 'NDATA'\n");
5574
if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5576
if (!IS_BLANK_CH(CUR)) {
5577
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5578
"Space required after 'NDATA'\n");
5581
ndata = xmlParseName(ctxt);
5582
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5583
(ctxt->sax->unparsedEntityDecl != NULL))
5584
ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5585
literal, URI, ndata);
5587
if ((ctxt->sax != NULL) &&
5588
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5589
ctxt->sax->entityDecl(ctxt->userData, name,
5590
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5591
literal, URI, NULL);
5593
* For expat compatibility in SAX mode.
5594
* assuming the entity repalcement was asked for
5596
if ((ctxt->replaceEntities != 0) &&
5597
((ctxt->myDoc == NULL) ||
5598
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5599
if (ctxt->myDoc == NULL) {
5600
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5601
if (ctxt->myDoc == NULL) {
5602
xmlErrMemory(ctxt, "New Doc failed");
5605
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5608
if (ctxt->myDoc->intSubset == NULL)
5609
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5610
BAD_CAST "fake", NULL, NULL);
5611
xmlSAX2EntityDecl(ctxt, name,
5612
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5613
literal, URI, NULL);
5618
if (ctxt->instate == XML_PARSER_EOF)
5622
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5623
"xmlParseEntityDecl: entity %s not terminated\n", name);
5625
if (input != ctxt->input) {
5626
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5627
"Entity declaration doesn't start and stop in the same entity\n");
5633
* Ugly mechanism to save the raw entity value.
5635
xmlEntityPtr cur = NULL;
5638
if ((ctxt->sax != NULL) &&
5639
(ctxt->sax->getParameterEntity != NULL))
5640
cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5642
if ((ctxt->sax != NULL) &&
5643
(ctxt->sax->getEntity != NULL))
5644
cur = ctxt->sax->getEntity(ctxt->userData, name);
5645
if ((cur == NULL) && (ctxt->userData==ctxt)) {
5646
cur = xmlSAX2GetEntity(ctxt, name);
5650
if (cur->orig != NULL)
5657
if (value != NULL) xmlFree(value);
5658
if (URI != NULL) xmlFree(URI);
5659
if (literal != NULL) xmlFree(literal);
5664
* xmlParseDefaultDecl:
5665
* @ctxt: an XML parser context
5666
* @value: Receive a possible fixed default value for the attribute
5668
* Parse an attribute default declaration
5670
* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5672
* [ VC: Required Attribute ]
5673
* if the default declaration is the keyword #REQUIRED, then the
5674
* attribute must be specified for all elements of the type in the
5675
* attribute-list declaration.
5677
* [ VC: Attribute Default Legal ]
5678
* The declared default value must meet the lexical constraints of
5679
* the declared attribute type c.f. xmlValidateAttributeDecl()
5681
* [ VC: Fixed Attribute Default ]
5682
* if an attribute has a default value declared with the #FIXED
5683
* keyword, instances of that attribute must match the default value.
5685
* [ WFC: No < in Attribute Values ]
5686
* handled in xmlParseAttValue()
5688
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5689
* or XML_ATTRIBUTE_FIXED.
5693
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5698
if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5700
return(XML_ATTRIBUTE_REQUIRED);
5702
if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5704
return(XML_ATTRIBUTE_IMPLIED);
5706
val = XML_ATTRIBUTE_NONE;
5707
if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5709
val = XML_ATTRIBUTE_FIXED;
5710
if (!IS_BLANK_CH(CUR)) {
5711
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5712
"Space required after '#FIXED'\n");
5716
ret = xmlParseAttValue(ctxt);
5717
ctxt->instate = XML_PARSER_DTD;
5719
xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5720
"Attribute default value declaration error\n");
5727
* xmlParseNotationType:
5728
* @ctxt: an XML parser context
5730
* parse an Notation attribute type.
5732
* Note: the leading 'NOTATION' S part has already being parsed...
5734
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5736
* [ VC: Notation Attributes ]
5737
* Values of this type must match one of the notation names included
5738
* in the declaration; all notation names in the declaration must be declared.
5740
* Returns: the notation attribute tree built while parsing
5744
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5745
const xmlChar *name;
5746
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5749
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5756
name = xmlParseName(ctxt);
5758
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5759
"Name expected in NOTATION declaration\n");
5760
xmlFreeEnumeration(ret);
5764
while (tmp != NULL) {
5765
if (xmlStrEqual(name, tmp->name)) {
5766
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5767
"standalone: attribute notation value token %s duplicated\n",
5769
if (!xmlDictOwns(ctxt->dict, name))
5770
xmlFree((xmlChar *) name);
5776
cur = xmlCreateEnumeration(name);
5778
xmlFreeEnumeration(ret);
5781
if (last == NULL) ret = last = cur;
5788
} while (RAW == '|');
5790
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5791
xmlFreeEnumeration(ret);
5799
* xmlParseEnumerationType:
5800
* @ctxt: an XML parser context
5802
* parse an Enumeration attribute type.
5804
* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5806
* [ VC: Enumeration ]
5807
* Values of this type must match one of the Nmtoken tokens in
5810
* Returns: the enumeration attribute tree built while parsing
5814
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5816
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5819
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5826
name = xmlParseNmtoken(ctxt);
5828
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5832
while (tmp != NULL) {
5833
if (xmlStrEqual(name, tmp->name)) {
5834
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5835
"standalone: attribute enumeration value token %s duplicated\n",
5837
if (!xmlDictOwns(ctxt->dict, name))
5844
cur = xmlCreateEnumeration(name);
5845
if (!xmlDictOwns(ctxt->dict, name))
5848
xmlFreeEnumeration(ret);
5851
if (last == NULL) ret = last = cur;
5858
} while (RAW == '|');
5860
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5868
* xmlParseEnumeratedType:
5869
* @ctxt: an XML parser context
5870
* @tree: the enumeration tree built while parsing
5872
* parse an Enumerated attribute type.
5874
* [57] EnumeratedType ::= NotationType | Enumeration
5876
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5879
* Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5883
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5884
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5886
if (!IS_BLANK_CH(CUR)) {
5887
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5888
"Space required after 'NOTATION'\n");
5892
*tree = xmlParseNotationType(ctxt);
5893
if (*tree == NULL) return(0);
5894
return(XML_ATTRIBUTE_NOTATION);
5896
*tree = xmlParseEnumerationType(ctxt);
5897
if (*tree == NULL) return(0);
5898
return(XML_ATTRIBUTE_ENUMERATION);
5902
* xmlParseAttributeType:
5903
* @ctxt: an XML parser context
5904
* @tree: the enumeration tree built while parsing
5906
* parse the Attribute list def for an element
5908
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
5910
* [55] StringType ::= 'CDATA'
5912
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5913
* 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5915
* Validity constraints for attribute values syntax are checked in
5916
* xmlValidateAttributeValue()
5919
* Values of type ID must match the Name production. A name must not
5920
* appear more than once in an XML document as a value of this type;
5921
* i.e., ID values must uniquely identify the elements which bear them.
5923
* [ VC: One ID per Element Type ]
5924
* No element type may have more than one ID attribute specified.
5926
* [ VC: ID Attribute Default ]
5927
* An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5930
* Values of type IDREF must match the Name production, and values
5931
* of type IDREFS must match Names; each IDREF Name must match the value
5932
* of an ID attribute on some element in the XML document; i.e. IDREF
5933
* values must match the value of some ID attribute.
5935
* [ VC: Entity Name ]
5936
* Values of type ENTITY must match the Name production, values
5937
* of type ENTITIES must match Names; each Entity Name must match the
5938
* name of an unparsed entity declared in the DTD.
5940
* [ VC: Name Token ]
5941
* Values of type NMTOKEN must match the Nmtoken production; values
5942
* of type NMTOKENS must match Nmtokens.
5944
* Returns the attribute type
5947
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5949
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5951
return(XML_ATTRIBUTE_CDATA);
5952
} else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5954
return(XML_ATTRIBUTE_IDREFS);
5955
} else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5957
return(XML_ATTRIBUTE_IDREF);
5958
} else if ((RAW == 'I') && (NXT(1) == 'D')) {
5960
return(XML_ATTRIBUTE_ID);
5961
} else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5963
return(XML_ATTRIBUTE_ENTITY);
5964
} else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5966
return(XML_ATTRIBUTE_ENTITIES);
5967
} else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5969
return(XML_ATTRIBUTE_NMTOKENS);
5970
} else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5972
return(XML_ATTRIBUTE_NMTOKEN);
5974
return(xmlParseEnumeratedType(ctxt, tree));
5978
* xmlParseAttributeListDecl:
5979
* @ctxt: an XML parser context
5981
* : parse the Attribute list def for an element
5983
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5985
* [53] AttDef ::= S Name S AttType S DefaultDecl
5989
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5990
const xmlChar *elemName;
5991
const xmlChar *attrName;
5992
xmlEnumerationPtr tree;
5994
if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5995
xmlParserInputPtr input = ctxt->input;
5998
if (!IS_BLANK_CH(CUR)) {
5999
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6000
"Space required after '<!ATTLIST'\n");
6003
elemName = xmlParseName(ctxt);
6004
if (elemName == NULL) {
6005
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6006
"ATTLIST: no name for Element\n");
6011
while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6012
const xmlChar *check = CUR_PTR;
6015
xmlChar *defaultValue = NULL;
6019
attrName = xmlParseName(ctxt);
6020
if (attrName == NULL) {
6021
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6022
"ATTLIST: no name for Attribute\n");
6026
if (!IS_BLANK_CH(CUR)) {
6027
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6028
"Space required after the attribute name\n");
6033
type = xmlParseAttributeType(ctxt, &tree);
6039
if (!IS_BLANK_CH(CUR)) {
6040
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6041
"Space required after the attribute type\n");
6043
xmlFreeEnumeration(tree);
6048
def = xmlParseDefaultDecl(ctxt, &defaultValue);
6050
if (defaultValue != NULL)
6051
xmlFree(defaultValue);
6053
xmlFreeEnumeration(tree);
6056
if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6057
xmlAttrNormalizeSpace(defaultValue, defaultValue);
6061
if (!IS_BLANK_CH(CUR)) {
6062
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6063
"Space required after the attribute default value\n");
6064
if (defaultValue != NULL)
6065
xmlFree(defaultValue);
6067
xmlFreeEnumeration(tree);
6072
if (check == CUR_PTR) {
6073
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6074
"in xmlParseAttributeListDecl\n");
6075
if (defaultValue != NULL)
6076
xmlFree(defaultValue);
6078
xmlFreeEnumeration(tree);
6081
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6082
(ctxt->sax->attributeDecl != NULL))
6083
ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6084
type, def, defaultValue, tree);
6085
else if (tree != NULL)
6086
xmlFreeEnumeration(tree);
6088
if ((ctxt->sax2) && (defaultValue != NULL) &&
6089
(def != XML_ATTRIBUTE_IMPLIED) &&
6090
(def != XML_ATTRIBUTE_REQUIRED)) {
6091
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6094
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6096
if (defaultValue != NULL)
6097
xmlFree(defaultValue);
6101
if (input != ctxt->input) {
6102
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6103
"Attribute list declaration doesn't start and stop in the same entity\n",
6112
* xmlParseElementMixedContentDecl:
6113
* @ctxt: an XML parser context
6114
* @inputchk: the input used for the current entity, needed for boundary checks
6116
* parse the declaration for a Mixed Element content
6117
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6119
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6120
* '(' S? '#PCDATA' S? ')'
6122
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6124
* [ VC: No Duplicate Types ]
6125
* The same name must not appear more than once in a single
6126
* mixed-content declaration.
6128
* returns: the list of the xmlElementContentPtr describing the element choices
6130
xmlElementContentPtr
6131
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6132
xmlElementContentPtr ret = NULL, cur = NULL, n;
6133
const xmlChar *elem = NULL;
6136
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6141
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6142
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6143
"Element content declaration doesn't start and stop in the same entity\n",
6147
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6151
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6156
if ((RAW == '(') || (RAW == '|')) {
6157
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6158
if (ret == NULL) return(NULL);
6160
while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6163
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6164
if (ret == NULL) return(NULL);
6170
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6171
if (n == NULL) return(NULL);
6172
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6181
elem = xmlParseName(ctxt);
6183
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6184
"xmlParseElementMixedContentDecl : Name expected\n");
6185
xmlFreeDocElementContent(ctxt->myDoc, cur);
6191
if ((RAW == ')') && (NXT(1) == '*')) {
6193
cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6194
XML_ELEMENT_CONTENT_ELEMENT);
6195
if (cur->c2 != NULL)
6196
cur->c2->parent = cur;
6199
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6200
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6201
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6202
"Element content declaration doesn't start and stop in the same entity\n",
6207
xmlFreeDocElementContent(ctxt->myDoc, ret);
6208
xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6213
xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6219
* xmlParseElementChildrenContentDeclPriv:
6220
* @ctxt: an XML parser context
6221
* @inputchk: the input used for the current entity, needed for boundary checks
6222
* @depth: the level of recursion
6224
* parse the declaration for a Mixed Element content
6225
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6228
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6230
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6232
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6234
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6236
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6237
* TODO Parameter-entity replacement text must be properly nested
6238
* with parenthesized groups. That is to say, if either of the
6239
* opening or closing parentheses in a choice, seq, or Mixed
6240
* construct is contained in the replacement text for a parameter
6241
* entity, both must be contained in the same replacement text. For
6242
* interoperability, if a parameter-entity reference appears in a
6243
* choice, seq, or Mixed construct, its replacement text should not
6244
* be empty, and neither the first nor last non-blank character of
6245
* the replacement text should be a connector (| or ,).
6247
* Returns the tree of xmlElementContentPtr describing the element
6250
static xmlElementContentPtr
6251
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6253
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6254
const xmlChar *elem;
6257
if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6259
xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6260
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6267
int inputid = ctxt->input->id;
6269
/* Recurse on first child */
6272
cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6277
elem = xmlParseName(ctxt);
6279
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6282
cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6284
xmlErrMemory(ctxt, NULL);
6289
cur->ocur = XML_ELEMENT_CONTENT_OPT;
6291
} else if (RAW == '*') {
6292
cur->ocur = XML_ELEMENT_CONTENT_MULT;
6294
} else if (RAW == '+') {
6295
cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6298
cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6304
while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6306
* Each loop we parse one separator and one element.
6309
if (type == 0) type = CUR;
6312
* Detect "Name | Name , Name" error
6314
else if (type != CUR) {
6315
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6316
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6318
if ((last != NULL) && (last != ret))
6319
xmlFreeDocElementContent(ctxt->myDoc, last);
6321
xmlFreeDocElementContent(ctxt->myDoc, ret);
6326
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6328
if ((last != NULL) && (last != ret))
6329
xmlFreeDocElementContent(ctxt->myDoc, last);
6330
xmlFreeDocElementContent(ctxt->myDoc, ret);
6348
} else if (RAW == '|') {
6349
if (type == 0) type = CUR;
6352
* Detect "Name , Name | Name" error
6354
else if (type != CUR) {
6355
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6356
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6358
if ((last != NULL) && (last != ret))
6359
xmlFreeDocElementContent(ctxt->myDoc, last);
6361
xmlFreeDocElementContent(ctxt->myDoc, ret);
6366
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6368
if ((last != NULL) && (last != ret))
6369
xmlFreeDocElementContent(ctxt->myDoc, last);
6371
xmlFreeDocElementContent(ctxt->myDoc, ret);
6390
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6391
if ((last != NULL) && (last != ret))
6392
xmlFreeDocElementContent(ctxt->myDoc, last);
6394
xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
int inputid = ctxt->input->id;
6402
/* Recurse on second child */
6405
last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6409
elem = xmlParseName(ctxt);
6411
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6413
xmlFreeDocElementContent(ctxt->myDoc, ret);
6416
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6419
xmlFreeDocElementContent(ctxt->myDoc, ret);
6423
last->ocur = XML_ELEMENT_CONTENT_OPT;
6425
} else if (RAW == '*') {
6426
last->ocur = XML_ELEMENT_CONTENT_MULT;
6428
} else if (RAW == '+') {
6429
last->ocur = XML_ELEMENT_CONTENT_PLUS;
6432
last->ocur = XML_ELEMENT_CONTENT_ONCE;
6438
if ((cur != NULL) && (last != NULL)) {
6443
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6444
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6445
"Element content declaration doesn't start and stop in the same entity\n",
6451
if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6452
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6453
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6455
ret->ocur = XML_ELEMENT_CONTENT_OPT;
6458
} else if (RAW == '*') {
6460
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6463
* Some normalization:
6464
* (a | b* | c?)* == (a | b | c)*
6466
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6467
if ((cur->c1 != NULL) &&
6468
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6469
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6470
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6471
if ((cur->c2 != NULL) &&
6472
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6473
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6474
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6479
} else if (RAW == '+') {
6483
if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6484
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6485
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6487
ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6489
* Some normalization:
6490
* (a | b*)+ == (a | b)*
6491
* (a | b?)+ == (a | b)*
6493
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6494
if ((cur->c1 != NULL) &&
6495
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6496
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6497
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6500
if ((cur->c2 != NULL) &&
6501
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6503
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6509
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6517
* xmlParseElementChildrenContentDecl:
6518
* @ctxt: an XML parser context
6519
* @inputchk: the input used for the current entity, needed for boundary checks
6521
* parse the declaration for a Mixed Element content
6522
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6524
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6526
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6528
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6530
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6532
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6533
* TODO Parameter-entity replacement text must be properly nested
6534
* with parenthesized groups. That is to say, if either of the
6535
* opening or closing parentheses in a choice, seq, or Mixed
6536
* construct is contained in the replacement text for a parameter
6537
* entity, both must be contained in the same replacement text. For
6538
* interoperability, if a parameter-entity reference appears in a
6539
* choice, seq, or Mixed construct, its replacement text should not
6540
* be empty, and neither the first nor last non-blank character of
6541
* the replacement text should be a connector (| or ,).
6543
* Returns the tree of xmlElementContentPtr describing the element
6546
xmlElementContentPtr
6547
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6548
/* stub left for API/ABI compat */
6549
return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6553
* xmlParseElementContentDecl:
6554
* @ctxt: an XML parser context
6555
* @name: the name of the element being defined.
6556
* @result: the Element Content pointer will be stored here if any
6558
* parse the declaration for an Element content either Mixed or Children,
6559
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6561
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6563
* returns: the type of element content XML_ELEMENT_TYPE_xxx
6567
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6568
xmlElementContentPtr *result) {
6570
xmlElementContentPtr tree = NULL;
6571
int inputid = ctxt->input->id;
6577
xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6578
"xmlParseElementContentDecl : %s '(' expected\n", name);
6583
if (ctxt->instate == XML_PARSER_EOF)
6586
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6587
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6588
res = XML_ELEMENT_TYPE_MIXED;
6590
tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6591
res = XML_ELEMENT_TYPE_ELEMENT;
6599
* xmlParseElementDecl:
6600
* @ctxt: an XML parser context
6602
* parse an Element declaration.
6604
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6606
* [ VC: Unique Element Type Declaration ]
6607
* No element type may be declared more than once
6609
* Returns the type of the element, or -1 in case of error
6612
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6613
const xmlChar *name;
6615
xmlElementContentPtr content = NULL;
6617
/* GROW; done in the caller */
6618
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6619
xmlParserInputPtr input = ctxt->input;
6622
if (!IS_BLANK_CH(CUR)) {
6623
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6624
"Space required after 'ELEMENT'\n");
6627
name = xmlParseName(ctxt);
6629
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6630
"xmlParseElementDecl: no name for Element\n");
6633
while ((RAW == 0) && (ctxt->inputNr > 1))
6635
if (!IS_BLANK_CH(CUR)) {
6636
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6637
"Space required after the element name\n");
6640
if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6643
* Element must always be empty.
6645
ret = XML_ELEMENT_TYPE_EMPTY;
6646
} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6650
* Element is a generic container.
6652
ret = XML_ELEMENT_TYPE_ANY;
6653
} else if (RAW == '(') {
6654
ret = xmlParseElementContentDecl(ctxt, name, &content);
6657
* [ WFC: PEs in Internal Subset ] error handling.
6659
if ((RAW == '%') && (ctxt->external == 0) &&
6660
(ctxt->inputNr == 1)) {
6661
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6662
"PEReference: forbidden within markup decl in internal subset\n");
6664
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6665
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6672
* Pop-up of finished entities.
6674
while ((RAW == 0) && (ctxt->inputNr > 1))
6679
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6680
if (content != NULL) {
6681
xmlFreeDocElementContent(ctxt->myDoc, content);
6684
if (input != ctxt->input) {
6685
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6686
"Element declaration doesn't start and stop in the same entity\n");
6690
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6691
(ctxt->sax->elementDecl != NULL)) {
6692
if (content != NULL)
6693
content->parent = NULL;
6694
ctxt->sax->elementDecl(ctxt->userData, name, ret,
6696
if ((content != NULL) && (content->parent == NULL)) {
6698
* this is a trick: if xmlAddElementDecl is called,
6699
* instead of copying the full tree it is plugged directly
6700
* if called from the parser. Avoid duplicating the
6701
* interfaces or change the API/ABI
6703
xmlFreeDocElementContent(ctxt->myDoc, content);
6705
} else if (content != NULL) {
6706
xmlFreeDocElementContent(ctxt->myDoc, content);
6714
* xmlParseConditionalSections
6715
* @ctxt: an XML parser context
6717
* [61] conditionalSect ::= includeSect | ignoreSect
6718
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6719
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6720
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6721
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6725
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6726
int id = ctxt->input->id;
6730
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6734
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6736
if (ctxt->input->id != id) {
6737
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6738
"All markup of the conditional section is not in the same entity\n",
6743
if (xmlParserDebugEntities) {
6744
if ((ctxt->input != NULL) && (ctxt->input->filename))
6745
xmlGenericError(xmlGenericErrorContext,
6746
"%s(%d): ", ctxt->input->filename,
6748
xmlGenericError(xmlGenericErrorContext,
6749
"Entering INCLUDE Conditional Section\n");
6752
while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6753
(NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6754
const xmlChar *check = CUR_PTR;
6755
unsigned int cons = ctxt->input->consumed;
6757
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6758
xmlParseConditionalSections(ctxt);
6759
} else if (IS_BLANK_CH(CUR)) {
6761
} else if (RAW == '%') {
6762
xmlParsePEReference(ctxt);
6764
xmlParseMarkupDecl(ctxt);
6767
* Pop-up of finished entities.
6769
while ((RAW == 0) && (ctxt->inputNr > 1))
6772
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6773
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6777
if (xmlParserDebugEntities) {
6778
if ((ctxt->input != NULL) && (ctxt->input->filename))
6779
xmlGenericError(xmlGenericErrorContext,
6780
"%s(%d): ", ctxt->input->filename,
6782
xmlGenericError(xmlGenericErrorContext,
6783
"Leaving INCLUDE Conditional Section\n");
6786
} else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6788
xmlParserInputState instate;
6794
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6796
if (ctxt->input->id != id) {
6797
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6798
"All markup of the conditional section is not in the same entity\n",
6803
if (xmlParserDebugEntities) {
6804
if ((ctxt->input != NULL) && (ctxt->input->filename))
6805
xmlGenericError(xmlGenericErrorContext,
6806
"%s(%d): ", ctxt->input->filename,
6808
xmlGenericError(xmlGenericErrorContext,
6809
"Entering IGNORE Conditional Section\n");
6813
* Parse up to the end of the conditional section
6814
* But disable SAX event generating DTD building in the meantime
6816
state = ctxt->disableSAX;
6817
instate = ctxt->instate;
6818
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6819
ctxt->instate = XML_PARSER_IGNORE;
6821
while (((depth >= 0) && (RAW != 0)) &&
6822
(ctxt->instate != XML_PARSER_EOF)) {
6823
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6828
if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6829
if (--depth >= 0) SKIP(3);
6836
ctxt->disableSAX = state;
6837
ctxt->instate = instate;
6839
if (xmlParserDebugEntities) {
6840
if ((ctxt->input != NULL) && (ctxt->input->filename))
6841
xmlGenericError(xmlGenericErrorContext,
6842
"%s(%d): ", ctxt->input->filename,
6844
xmlGenericError(xmlGenericErrorContext,
6845
"Leaving IGNORE Conditional Section\n");
6849
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6856
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6858
if (ctxt->input->id != id) {
6859
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6860
"All markup of the conditional section is not in the same entity\n",
6868
* xmlParseMarkupDecl:
6869
* @ctxt: an XML parser context
6871
* parse Markup declarations
6873
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6874
* NotationDecl | PI | Comment
6876
* [ VC: Proper Declaration/PE Nesting ]
6877
* Parameter-entity replacement text must be properly nested with
6878
* markup declarations. That is to say, if either the first character
6879
* or the last character of a markup declaration (markupdecl above) is
6880
* contained in the replacement text for a parameter-entity reference,
6881
* both must be contained in the same replacement text.
6883
* [ WFC: PEs in Internal Subset ]
6884
* In the internal DTD subset, parameter-entity references can occur
6885
* only where markup declarations can occur, not within markup declarations.
6886
* (This does not apply to references that occur in external parameter
6887
* entities or to the external subset.)
6890
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6893
if (NXT(1) == '!') {
6897
xmlParseElementDecl(ctxt);
6898
else if (NXT(3) == 'N')
6899
xmlParseEntityDecl(ctxt);
6902
xmlParseAttributeListDecl(ctxt);
6905
xmlParseNotationDecl(ctxt);
6908
xmlParseComment(ctxt);
6911
/* there is an error but it will be detected later */
6914
} else if (NXT(1) == '?') {
6919
* This is only for internal subset. On external entities,
6920
* the replacement is done before parsing stage
6922
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6923
xmlParsePEReference(ctxt);
6926
* Conditional sections are allowed from entities included
6927
* by PE References in the internal subset.
6929
if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6930
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6931
xmlParseConditionalSections(ctxt);
6935
ctxt->instate = XML_PARSER_DTD;
6940
* @ctxt: an XML parser context
6942
* parse an XML declaration header for external entities
6944
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6948
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6950
const xmlChar *encoding;
6953
* We know that '<?xml' is here.
6955
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6958
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6962
if (!IS_BLANK_CH(CUR)) {
6963
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6964
"Space needed after '<?xml'\n");
6969
* We may have the VersionInfo here.
6971
version = xmlParseVersionInfo(ctxt);
6972
if (version == NULL)
6973
version = xmlCharStrdup(XML_DEFAULT_VERSION);
6975
if (!IS_BLANK_CH(CUR)) {
6976
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6977
"Space needed here\n");
6980
ctxt->input->version = version;
6983
* We must have the encoding declaration
6985
encoding = xmlParseEncodingDecl(ctxt);
6986
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6988
* The XML REC instructs us to stop parsing right here
6992
if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6993
xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6994
"Missing encoding in text declaration\n");
6998
if ((RAW == '?') && (NXT(1) == '>')) {
7000
} else if (RAW == '>') {
7001
/* Deprecated old WD ... */
7002
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7005
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7006
MOVETO_ENDTAG(CUR_PTR);
7012
* xmlParseExternalSubset:
7013
* @ctxt: an XML parser context
7014
* @ExternalID: the external identifier
7015
* @SystemID: the system identifier (or URL)
7017
* parse Markup declarations from an external subset
7019
* [30] extSubset ::= textDecl? extSubsetDecl
7021
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7024
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7025
const xmlChar *SystemID) {
7026
xmlDetectSAX2(ctxt);
7029
if ((ctxt->encoding == NULL) &&
7030
(ctxt->input->end - ctxt->input->cur >= 4)) {
7032
xmlCharEncoding enc;
7038
enc = xmlDetectCharEncoding(start, 4);
7039
if (enc != XML_CHAR_ENCODING_NONE)
7040
xmlSwitchEncoding(ctxt, enc);
7043
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7044
xmlParseTextDecl(ctxt);
7045
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7047
* The XML REC instructs us to stop parsing right here
7049
ctxt->instate = XML_PARSER_EOF;
7053
if (ctxt->myDoc == NULL) {
7054
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7055
if (ctxt->myDoc == NULL) {
7056
xmlErrMemory(ctxt, "New Doc failed");
7059
ctxt->myDoc->properties = XML_DOC_INTERNAL;
7061
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7062
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7064
ctxt->instate = XML_PARSER_DTD;
7066
while (((RAW == '<') && (NXT(1) == '?')) ||
7067
((RAW == '<') && (NXT(1) == '!')) ||
7068
(RAW == '%') || IS_BLANK_CH(CUR)) {
7069
const xmlChar *check = CUR_PTR;
7070
unsigned int cons = ctxt->input->consumed;
7073
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7074
xmlParseConditionalSections(ctxt);
7075
} else if (IS_BLANK_CH(CUR)) {
7077
} else if (RAW == '%') {
7078
xmlParsePEReference(ctxt);
7080
xmlParseMarkupDecl(ctxt);
7083
* Pop-up of finished entities.
7085
while ((RAW == 0) && (ctxt->inputNr > 1))
7088
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7089
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7095
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7101
* xmlParseReference:
7102
* @ctxt: an XML parser context
7104
* parse and handle entity references in content, depending on the SAX
7105
* interface, this may end-up in a call to character() if this is a
7106
* CharRef, a predefined entity, if there is no reference() callback.
7107
* or if the parser was asked to switch to that mode.
7109
* [67] Reference ::= EntityRef | CharRef
7112
xmlParseReference(xmlParserCtxtPtr ctxt) {
7116
xmlNodePtr list = NULL;
7117
xmlParserErrors ret = XML_ERR_OK;
7124
* Simple case of a CharRef
7126
if (NXT(1) == '#') {
7130
int value = xmlParseCharRef(ctxt);
7134
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7136
* So we are using non-UTF-8 buffers
7137
* Check that the char fit on 8bits, if not
7138
* generate a CharRef.
7140
if (value <= 0xFF) {
7143
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144
(!ctxt->disableSAX))
7145
ctxt->sax->characters(ctxt->userData, out, 1);
7147
if ((hex == 'x') || (hex == 'X'))
7148
snprintf((char *)out, sizeof(out), "#x%X", value);
7150
snprintf((char *)out, sizeof(out), "#%d", value);
7151
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7152
(!ctxt->disableSAX))
7153
ctxt->sax->reference(ctxt->userData, out);
7157
* Just encode the value in UTF-8
7159
COPY_BUF(0 ,out, i, value);
7161
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7162
(!ctxt->disableSAX))
7163
ctxt->sax->characters(ctxt->userData, out, i);
7169
* We are seeing an entity reference
7171
ent = xmlParseEntityRef(ctxt);
7172
if (ent == NULL) return;
7173
if (!ctxt->wellFormed)
7175
was_checked = ent->checked;
7177
/* special case of predefined entities */
7178
if ((ent->name == NULL) ||
7179
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7181
if (val == NULL) return;
7183
* inline the entity.
7185
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7186
(!ctxt->disableSAX))
7187
ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7192
* The first reference to the entity trigger a parsing phase
7193
* where the ent->children is filled with the result from
7195
* Note: external parsed entities will not be loaded, it is not
7196
* required for a non-validating parser, unless the parsing option
7197
* of validating, or substituting entities were given. Doing so is
7198
* far more secure as the parser will only process data coming from
7199
* the document entity by default.
7201
if ((ent->checked == 0) &&
7202
((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7203
(ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7204
unsigned long oldnbent = ctxt->nbentities;
7207
* This is a bit hackish but this seems the best
7208
* way to make sure both SAX and DOM entity support
7212
if (ctxt->userData == ctxt)
7215
user_data = ctxt->userData;
7218
* Check that this entity is well formed
7219
* 4.3.2: An internal general parsed entity is well-formed
7220
* if its replacement text matches the production labeled
7223
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7225
ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7229
} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7231
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7232
user_data, ctxt->depth, ent->URI,
7233
ent->ExternalID, &list);
7236
ret = XML_ERR_ENTITY_PE_INTERNAL;
7237
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7238
"invalid entity type found\n", NULL);
7242
* Store the number of entities needing parsing for this entity
7243
* content and do checkings
7245
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7246
if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7248
if (ret == XML_ERR_ENTITY_LOOP) {
7249
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7250
xmlFreeNodeList(list);
7253
if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7254
xmlFreeNodeList(list);
7258
if ((ret == XML_ERR_OK) && (list != NULL)) {
7259
if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7260
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7261
(ent->children == NULL)) {
7262
ent->children = list;
7263
if (ctxt->replaceEntities) {
7265
* Prune it directly in the generated document
7266
* except for single text nodes.
7268
if (((list->type == XML_TEXT_NODE) &&
7269
(list->next == NULL)) ||
7270
(ctxt->parseMode == XML_PARSE_READER)) {
7271
list->parent = (xmlNodePtr) ent;
7276
while (list != NULL) {
7277
list->parent = (xmlNodePtr) ctxt->node;
7278
list->doc = ctxt->myDoc;
7279
if (list->next == NULL)
7283
list = ent->children;
7284
#ifdef LIBXML_LEGACY_ENABLED
7285
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7286
xmlAddEntityReference(ent, list, NULL);
7287
#endif /* LIBXML_LEGACY_ENABLED */
7291
while (list != NULL) {
7292
list->parent = (xmlNodePtr) ent;
7293
xmlSetTreeDoc(list, ent->doc);
7294
if (list->next == NULL)
7300
xmlFreeNodeList(list);
7303
} else if ((ret != XML_ERR_OK) &&
7304
(ret != XML_WAR_UNDECLARED_ENTITY)) {
7305
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7306
"Entity '%s' failed to parse\n", ent->name);
7307
} else if (list != NULL) {
7308
xmlFreeNodeList(list);
7311
if (ent->checked == 0)
7313
} else if (ent->checked != 1) {
7314
ctxt->nbentities += ent->checked / 2;
7318
* Now that the entity content has been gathered
7319
* provide it to the application, this can take different forms based
7320
* on the parsing modes.
7322
if (ent->children == NULL) {
7324
* Probably running in SAX mode and the callbacks don't
7325
* build the entity content. So unless we already went
7326
* though parsing for first checking go though the entity
7327
* content to generate callbacks associated to the entity
7329
if (was_checked != 0) {
7332
* This is a bit hackish but this seems the best
7333
* way to make sure both SAX and DOM entity support
7336
if (ctxt->userData == ctxt)
7339
user_data = ctxt->userData;
7341
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7343
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7344
ent->content, user_data, NULL);
7346
} else if (ent->etype ==
7347
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7349
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7350
ctxt->sax, user_data, ctxt->depth,
7351
ent->URI, ent->ExternalID, NULL);
7354
ret = XML_ERR_ENTITY_PE_INTERNAL;
7355
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7356
"invalid entity type found\n", NULL);
7358
if (ret == XML_ERR_ENTITY_LOOP) {
7359
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7363
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7364
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7366
* Entity reference callback comes second, it's somewhat
7367
* superfluous but a compatibility to historical behaviour
7369
ctxt->sax->reference(ctxt->userData, ent->name);
7375
* If we didn't get any children for the entity being built
7377
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7378
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7382
ctxt->sax->reference(ctxt->userData, ent->name);
7386
if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7388
* There is a problem on the handling of _private for entities
7389
* (bug 155816): Should we copy the content of the field from
7390
* the entity (possibly overwriting some value set by the user
7391
* when a copy is created), should we leave it alone, or should
7392
* we try to take care of different situations? The problem
7393
* is exacerbated by the usage of this field by the xmlReader.
7394
* To fix this bug, we look at _private on the created node
7395
* and, if it's NULL, we copy in whatever was in the entity.
7396
* If it's not NULL we leave it alone. This is somewhat of a
7397
* hack - maybe we should have further tests to determine
7400
if ((ctxt->node != NULL) && (ent->children != NULL)) {
7402
* Seems we are generating the DOM content, do
7403
* a simple tree copy for all references except the first
7404
* In the first occurrence list contains the replacement.
7406
if (((list == NULL) && (ent->owner == 0)) ||
7407
(ctxt->parseMode == XML_PARSE_READER)) {
7408
xmlNodePtr nw = NULL, cur, firstChild = NULL;
7411
* We are copying here, make sure there is no abuse
7413
ctxt->sizeentcopy += ent->length;
7414
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7418
* when operating on a reader, the entities definitions
7419
* are always owning the entities subtree.
7420
if (ctxt->parseMode == XML_PARSE_READER)
7424
cur = ent->children;
7425
while (cur != NULL) {
7426
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7428
if (nw->_private == NULL)
7429
nw->_private = cur->_private;
7430
if (firstChild == NULL){
7433
nw = xmlAddChild(ctxt->node, nw);
7435
if (cur == ent->last) {
7437
* needed to detect some strange empty
7438
* node cases in the reader tests
7440
if ((ctxt->parseMode == XML_PARSE_READER) &&
7442
(nw->type == XML_ELEMENT_NODE) &&
7443
(nw->children == NULL))
7450
#ifdef LIBXML_LEGACY_ENABLED
7451
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7452
xmlAddEntityReference(ent, firstChild, nw);
7453
#endif /* LIBXML_LEGACY_ENABLED */
7454
} else if ((list == NULL) || (ctxt->inputNr > 0)) {
7455
xmlNodePtr nw = NULL, cur, next, last,
7459
* We are copying here, make sure there is no abuse
7461
ctxt->sizeentcopy += ent->length;
7462
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7466
* Copy the entity child list and make it the new
7467
* entity child list. The goal is to make sure any
7468
* ID or REF referenced will be the one from the
7469
* document content and not the entity copy.
7471
cur = ent->children;
7472
ent->children = NULL;
7475
while (cur != NULL) {
7479
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7481
if (nw->_private == NULL)
7482
nw->_private = cur->_private;
7483
if (firstChild == NULL){
7486
xmlAddChild((xmlNodePtr) ent, nw);
7487
xmlAddChild(ctxt->node, cur);
7493
if (ent->owner == 0)
7495
#ifdef LIBXML_LEGACY_ENABLED
7496
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7497
xmlAddEntityReference(ent, firstChild, nw);
7498
#endif /* LIBXML_LEGACY_ENABLED */
7500
const xmlChar *nbktext;
7503
* the name change is to avoid coalescing of the
7504
* node with a possible previous text one which
7505
* would make ent->children a dangling pointer
7507
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7509
if (ent->children->type == XML_TEXT_NODE)
7510
ent->children->name = nbktext;
7511
if ((ent->last != ent->children) &&
7512
(ent->last->type == XML_TEXT_NODE))
7513
ent->last->name = nbktext;
7514
xmlAddChildList(ctxt->node, ent->children);
7518
* This is to avoid a nasty side effect, see
7519
* characters() in SAX.c
7529
* xmlParseEntityRef:
7530
* @ctxt: an XML parser context
7532
* parse ENTITY references declarations
7534
* [68] EntityRef ::= '&' Name ';'
7536
* [ WFC: Entity Declared ]
7537
* In a document without any DTD, a document with only an internal DTD
7538
* subset which contains no parameter entity references, or a document
7539
* with "standalone='yes'", the Name given in the entity reference
7540
* must match that in an entity declaration, except that well-formed
7541
* documents need not declare any of the following entities: amp, lt,
7542
* gt, apos, quot. The declaration of a parameter entity must precede
7543
* any reference to it. Similarly, the declaration of a general entity
7544
* must precede any reference to it which appears in a default value in an
7545
* attribute-list declaration. Note that if entities are declared in the
7546
* external subset or in external parameter entities, a non-validating
7547
* processor is not obligated to read and process their declarations;
7548
* for such documents, the rule that an entity must be declared is a
7549
* well-formedness constraint only if standalone='yes'.
7551
* [ WFC: Parsed Entity ]
7552
* An entity reference must not contain the name of an unparsed entity
7554
* Returns the xmlEntityPtr if found, or NULL otherwise.
7557
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7558
const xmlChar *name;
7559
xmlEntityPtr ent = NULL;
7562
if (ctxt->instate == XML_PARSER_EOF)
7568
name = xmlParseName(ctxt);
7570
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7571
"xmlParseEntityRef: no name\n");
7575
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7581
* Predefined entities override any extra definition
7583
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7584
ent = xmlGetPredefinedEntity(name);
7590
* Increase the number of entity references parsed
7595
* Ask first SAX for entity resolution, otherwise try the
7596
* entities which may have stored in the parser context.
7598
if (ctxt->sax != NULL) {
7599
if (ctxt->sax->getEntity != NULL)
7600
ent = ctxt->sax->getEntity(ctxt->userData, name);
7601
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7602
(ctxt->options & XML_PARSE_OLDSAX))
7603
ent = xmlGetPredefinedEntity(name);
7604
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7605
(ctxt->userData==ctxt)) {
7606
ent = xmlSAX2GetEntity(ctxt, name);
7609
if (ctxt->instate == XML_PARSER_EOF)
7612
* [ WFC: Entity Declared ]
7613
* In a document without any DTD, a document with only an
7614
* internal DTD subset which contains no parameter entity
7615
* references, or a document with "standalone='yes'", the
7616
* Name given in the entity reference must match that in an
7617
* entity declaration, except that well-formed documents
7618
* need not declare any of the following entities: amp, lt,
7620
* The declaration of a parameter entity must precede any
7622
* Similarly, the declaration of a general entity must
7623
* precede any reference to it which appears in a default
7624
* value in an attribute-list declaration. Note that if
7625
* entities are declared in the external subset or in
7626
* external parameter entities, a non-validating processor
7627
* is not obligated to read and process their declarations;
7628
* for such documents, the rule that an entity must be
7629
* declared is a well-formedness constraint only if
7633
if ((ctxt->standalone == 1) ||
7634
((ctxt->hasExternalSubset == 0) &&
7635
(ctxt->hasPErefs == 0))) {
7636
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7637
"Entity '%s' not defined\n", name);
7639
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7640
"Entity '%s' not defined\n", name);
7641
if ((ctxt->inSubset == 0) &&
7642
(ctxt->sax != NULL) &&
7643
(ctxt->sax->reference != NULL)) {
7644
ctxt->sax->reference(ctxt->userData, name);
7651
* [ WFC: Parsed Entity ]
7652
* An entity reference must not contain the name of an
7655
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7656
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7657
"Entity reference to unparsed entity %s\n", name);
7661
* [ WFC: No External Entity References ]
7662
* Attribute values cannot contain direct or indirect
7663
* entity references to external entities.
7665
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7666
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7667
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7668
"Attribute references external entity '%s'\n", name);
7671
* [ WFC: No < in Attribute Values ]
7672
* The replacement text of any entity referred to directly or
7673
* indirectly in an attribute value (other than "<") must
7676
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7678
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7679
if ((ent->checked & 1) || ((ent->checked == 0) &&
7680
(ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7681
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7682
"'<' in entity '%s' is not allowed in attributes values\n", name);
7687
* Internal check, no parameter entities here ...
7690
switch (ent->etype) {
7691
case XML_INTERNAL_PARAMETER_ENTITY:
7692
case XML_EXTERNAL_PARAMETER_ENTITY:
7693
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7694
"Attempt to reference the parameter entity '%s'\n",
7703
* [ WFC: No Recursion ]
7704
* A parsed entity must not contain a recursive reference
7705
* to itself, either directly or indirectly.
7706
* Done somewhere else
7712
* xmlParseStringEntityRef:
7713
* @ctxt: an XML parser context
7714
* @str: a pointer to an index in the string
7716
* parse ENTITY references declarations, but this version parses it from
7719
* [68] EntityRef ::= '&' Name ';'
7721
* [ WFC: Entity Declared ]
7722
* In a document without any DTD, a document with only an internal DTD
7723
* subset which contains no parameter entity references, or a document
7724
* with "standalone='yes'", the Name given in the entity reference
7725
* must match that in an entity declaration, except that well-formed
7726
* documents need not declare any of the following entities: amp, lt,
7727
* gt, apos, quot. The declaration of a parameter entity must precede
7728
* any reference to it. Similarly, the declaration of a general entity
7729
* must precede any reference to it which appears in a default value in an
7730
* attribute-list declaration. Note that if entities are declared in the
7731
* external subset or in external parameter entities, a non-validating
7732
* processor is not obligated to read and process their declarations;
7733
* for such documents, the rule that an entity must be declared is a
7734
* well-formedness constraint only if standalone='yes'.
7736
* [ WFC: Parsed Entity ]
7737
* An entity reference must not contain the name of an unparsed entity
7739
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7740
* is updated to the current location in the string.
7743
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7747
xmlEntityPtr ent = NULL;
7749
if ((str == NULL) || (*str == NULL))
7757
name = xmlParseStringName(ctxt, &ptr);
7759
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7760
"xmlParseStringEntityRef: no name\n");
7765
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7774
* Predefined entites override any extra definition
7776
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7777
ent = xmlGetPredefinedEntity(name);
7786
* Increate the number of entity references parsed
7791
* Ask first SAX for entity resolution, otherwise try the
7792
* entities which may have stored in the parser context.
7794
if (ctxt->sax != NULL) {
7795
if (ctxt->sax->getEntity != NULL)
7796
ent = ctxt->sax->getEntity(ctxt->userData, name);
7797
if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7798
ent = xmlGetPredefinedEntity(name);
7799
if ((ent == NULL) && (ctxt->userData==ctxt)) {
7800
ent = xmlSAX2GetEntity(ctxt, name);
7803
if (ctxt->instate == XML_PARSER_EOF) {
7809
* [ WFC: Entity Declared ]
7810
* In a document without any DTD, a document with only an
7811
* internal DTD subset which contains no parameter entity
7812
* references, or a document with "standalone='yes'", the
7813
* Name given in the entity reference must match that in an
7814
* entity declaration, except that well-formed documents
7815
* need not declare any of the following entities: amp, lt,
7817
* The declaration of a parameter entity must precede any
7819
* Similarly, the declaration of a general entity must
7820
* precede any reference to it which appears in a default
7821
* value in an attribute-list declaration. Note that if
7822
* entities are declared in the external subset or in
7823
* external parameter entities, a non-validating processor
7824
* is not obligated to read and process their declarations;
7825
* for such documents, the rule that an entity must be
7826
* declared is a well-formedness constraint only if
7830
if ((ctxt->standalone == 1) ||
7831
((ctxt->hasExternalSubset == 0) &&
7832
(ctxt->hasPErefs == 0))) {
7833
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7834
"Entity '%s' not defined\n", name);
7836
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7837
"Entity '%s' not defined\n",
7840
/* TODO ? check regressions ctxt->valid = 0; */
7844
* [ WFC: Parsed Entity ]
7845
* An entity reference must not contain the name of an
7848
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7849
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7850
"Entity reference to unparsed entity %s\n", name);
7854
* [ WFC: No External Entity References ]
7855
* Attribute values cannot contain direct or indirect
7856
* entity references to external entities.
7858
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7859
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7860
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7861
"Attribute references external entity '%s'\n", name);
7864
* [ WFC: No < in Attribute Values ]
7865
* The replacement text of any entity referred to directly or
7866
* indirectly in an attribute value (other than "<") must
7869
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7870
(ent != NULL) && (ent->content != NULL) &&
7871
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7872
(xmlStrchr(ent->content, '<'))) {
7873
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7874
"'<' in entity '%s' is not allowed in attributes values\n",
7879
* Internal check, no parameter entities here ...
7882
switch (ent->etype) {
7883
case XML_INTERNAL_PARAMETER_ENTITY:
7884
case XML_EXTERNAL_PARAMETER_ENTITY:
7885
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7886
"Attempt to reference the parameter entity '%s'\n",
7895
* [ WFC: No Recursion ]
7896
* A parsed entity must not contain a recursive reference
7897
* to itself, either directly or indirectly.
7898
* Done somewhere else
7907
* xmlParsePEReference:
7908
* @ctxt: an XML parser context
7910
* parse PEReference declarations
7911
* The entity content is handled directly by pushing it's content as
7912
* a new input stream.
7914
* [69] PEReference ::= '%' Name ';'
7916
* [ WFC: No Recursion ]
7917
* A parsed entity must not contain a recursive
7918
* reference to itself, either directly or indirectly.
7920
* [ WFC: Entity Declared ]
7921
* In a document without any DTD, a document with only an internal DTD
7922
* subset which contains no parameter entity references, or a document
7923
* with "standalone='yes'", ... ... The declaration of a parameter
7924
* entity must precede any reference to it...
7926
* [ VC: Entity Declared ]
7927
* In a document with an external subset or external parameter entities
7928
* with "standalone='no'", ... ... The declaration of a parameter entity
7929
* must precede any reference to it...
7932
* Parameter-entity references may only appear in the DTD.
7933
* NOTE: misleading but this is handled.
7936
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7938
const xmlChar *name;
7939
xmlEntityPtr entity = NULL;
7940
xmlParserInputPtr input;
7945
name = xmlParseName(ctxt);
7947
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7948
"xmlParsePEReference: no name\n");
7952
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7959
* Increate the number of entity references parsed
7964
* Request the entity from SAX
7966
if ((ctxt->sax != NULL) &&
7967
(ctxt->sax->getParameterEntity != NULL))
7968
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7969
if (ctxt->instate == XML_PARSER_EOF)
7971
if (entity == NULL) {
7973
* [ WFC: Entity Declared ]
7974
* In a document without any DTD, a document with only an
7975
* internal DTD subset which contains no parameter entity
7976
* references, or a document with "standalone='yes'", ...
7977
* ... The declaration of a parameter entity must precede
7978
* any reference to it...
7980
if ((ctxt->standalone == 1) ||
7981
((ctxt->hasExternalSubset == 0) &&
7982
(ctxt->hasPErefs == 0))) {
7983
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7984
"PEReference: %%%s; not found\n",
7988
* [ VC: Entity Declared ]
7989
* In a document with an external subset or external
7990
* parameter entities with "standalone='no'", ...
7991
* ... The declaration of a parameter entity must
7992
* precede any reference to it...
7994
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7995
"PEReference: %%%s; not found\n",
8001
* Internal checking in case the entity quest barfed
8003
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8004
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8005
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8006
"Internal: %%%s; is not a parameter entity\n",
8008
} else if (ctxt->input->free != deallocblankswrapper) {
8009
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8010
if (xmlPushInput(ctxt, input) < 0)
8015
* handle the extra spaces added before and after
8016
* c.f. http://www.w3.org/TR/REC-xml#as-PE
8018
input = xmlNewEntityInputStream(ctxt, entity);
8019
if (xmlPushInput(ctxt, input) < 0)
8021
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8022
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8023
(IS_BLANK_CH(NXT(5)))) {
8024
xmlParseTextDecl(ctxt);
8026
XML_ERR_UNSUPPORTED_ENCODING) {
8028
* The XML REC instructs us to stop parsing
8031
ctxt->instate = XML_PARSER_EOF;
8037
ctxt->hasPErefs = 1;
8041
* xmlLoadEntityContent:
8042
* @ctxt: an XML parser context
8043
* @entity: an unloaded system entity
8045
* Load the original content of the given system entity from the
8046
* ExternalID/SystemID given. This is to be used for Included in Literal
8047
* http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8049
* Returns 0 in case of success and -1 in case of failure
8052
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8053
xmlParserInputPtr input;
8058
if ((ctxt == NULL) || (entity == NULL) ||
8059
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8060
(entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8061
(entity->content != NULL)) {
8062
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8063
"xmlLoadEntityContent parameter error");
8067
if (xmlParserDebugEntities)
8068
xmlGenericError(xmlGenericErrorContext,
8069
"Reading %s entity content input\n", entity->name);
8071
buf = xmlBufferCreate();
8073
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8074
"xmlLoadEntityContent parameter error");
8078
input = xmlNewEntityInputStream(ctxt, entity);
8079
if (input == NULL) {
8080
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8081
"xmlLoadEntityContent input error");
8087
* Push the entity as the current input, read char by char
8088
* saving to the buffer until the end of the entity or an error
8090
if (xmlPushInput(ctxt, input) < 0) {
8097
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8099
xmlBufferAdd(buf, ctxt->input->cur, l);
8100
if (count++ > XML_PARSER_CHUNK_SIZE) {
8103
if (ctxt->instate == XML_PARSER_EOF) {
8113
if (ctxt->instate == XML_PARSER_EOF) {
8121
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8123
} else if (!IS_CHAR(c)) {
8124
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8125
"xmlLoadEntityContent: invalid char value %d\n",
8130
entity->content = buf->content;
8131
buf->content = NULL;
8138
* xmlParseStringPEReference:
8139
* @ctxt: an XML parser context
8140
* @str: a pointer to an index in the string
8142
* parse PEReference declarations
8144
* [69] PEReference ::= '%' Name ';'
8146
* [ WFC: No Recursion ]
8147
* A parsed entity must not contain a recursive
8148
* reference to itself, either directly or indirectly.
8150
* [ WFC: Entity Declared ]
8151
* In a document without any DTD, a document with only an internal DTD
8152
* subset which contains no parameter entity references, or a document
8153
* with "standalone='yes'", ... ... The declaration of a parameter
8154
* entity must precede any reference to it...
8156
* [ VC: Entity Declared ]
8157
* In a document with an external subset or external parameter entities
8158
* with "standalone='no'", ... ... The declaration of a parameter entity
8159
* must precede any reference to it...
8162
* Parameter-entity references may only appear in the DTD.
8163
* NOTE: misleading but this is handled.
8165
* Returns the string of the entity content.
8166
* str is updated to the current value of the index
8169
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8173
xmlEntityPtr entity = NULL;
8175
if ((str == NULL) || (*str == NULL)) return(NULL);
8181
name = xmlParseStringName(ctxt, &ptr);
8183
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8184
"xmlParseStringPEReference: no name\n");
8190
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8198
* Increate the number of entity references parsed
8203
* Request the entity from SAX
8205
if ((ctxt->sax != NULL) &&
8206
(ctxt->sax->getParameterEntity != NULL))
8207
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8208
if (ctxt->instate == XML_PARSER_EOF) {
8212
if (entity == NULL) {
8214
* [ WFC: Entity Declared ]
8215
* In a document without any DTD, a document with only an
8216
* internal DTD subset which contains no parameter entity
8217
* references, or a document with "standalone='yes'", ...
8218
* ... The declaration of a parameter entity must precede
8219
* any reference to it...
8221
if ((ctxt->standalone == 1) ||
8222
((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8223
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8224
"PEReference: %%%s; not found\n", name);
8227
* [ VC: Entity Declared ]
8228
* In a document with an external subset or external
8229
* parameter entities with "standalone='no'", ...
8230
* ... The declaration of a parameter entity must
8231
* precede any reference to it...
8233
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8234
"PEReference: %%%s; not found\n",
8240
* Internal checking in case the entity quest barfed
8242
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8243
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8244
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8245
"%%%s; is not a parameter entity\n",
8249
ctxt->hasPErefs = 1;
8256
* xmlParseDocTypeDecl:
8257
* @ctxt: an XML parser context
8259
* parse a DOCTYPE declaration
8261
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8262
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8264
* [ VC: Root Element Type ]
8265
* The Name in the document type declaration must match the element
8266
* type of the root element.
8270
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8271
const xmlChar *name = NULL;
8272
xmlChar *ExternalID = NULL;
8273
xmlChar *URI = NULL;
8276
* We know that '<!DOCTYPE' has been detected.
8283
* Parse the DOCTYPE name.
8285
name = xmlParseName(ctxt);
8287
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8288
"xmlParseDocTypeDecl : no DOCTYPE name !\n");
8290
ctxt->intSubName = name;
8295
* Check for SystemID and ExternalID
8297
URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8299
if ((URI != NULL) || (ExternalID != NULL)) {
8300
ctxt->hasExternalSubset = 1;
8302
ctxt->extSubURI = URI;
8303
ctxt->extSubSystem = ExternalID;
8308
* Create and update the internal subset.
8310
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8311
(!ctxt->disableSAX))
8312
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8313
if (ctxt->instate == XML_PARSER_EOF)
8317
* Is there any internal subset declarations ?
8318
* they are handled separately in xmlParseInternalSubset()
8324
* We should be at the end of the DOCTYPE declaration.
8327
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8333
* xmlParseInternalSubset:
8334
* @ctxt: an XML parser context
8336
* parse the internal subset declaration
8338
* [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8342
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8344
* Is there any DTD definition ?
8347
ctxt->instate = XML_PARSER_DTD;
8350
* Parse the succession of Markup declarations and
8352
* Subsequence (markupdecl | PEReference | S)*
8354
while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8355
const xmlChar *check = CUR_PTR;
8356
unsigned int cons = ctxt->input->consumed;
8359
xmlParseMarkupDecl(ctxt);
8360
xmlParsePEReference(ctxt);
8363
* Pop-up of finished entities.
8365
while ((RAW == 0) && (ctxt->inputNr > 1))
8368
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8369
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8370
"xmlParseInternalSubset: error detected in Markup declaration\n");
8381
* We should be at the end of the DOCTYPE declaration.
8384
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8389
#ifdef LIBXML_SAX1_ENABLED
8391
* xmlParseAttribute:
8392
* @ctxt: an XML parser context
8393
* @value: a xmlChar ** used to store the value of the attribute
8395
* parse an attribute
8397
* [41] Attribute ::= Name Eq AttValue
8399
* [ WFC: No External Entity References ]
8400
* Attribute values cannot contain direct or indirect entity references
8401
* to external entities.
8403
* [ WFC: No < in Attribute Values ]
8404
* The replacement text of any entity referred to directly or indirectly in
8405
* an attribute value (other than "<") must not contain a <.
8407
* [ VC: Attribute Value Type ]
8408
* The attribute must have been declared; the value must be of the type
8411
* [25] Eq ::= S? '=' S?
8415
* [NS 11] Attribute ::= QName Eq AttValue
8417
* Also the case QName == xmlns:??? is handled independently as a namespace
8420
* Returns the attribute name, and the value in *value.
8424
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8425
const xmlChar *name;
8430
name = xmlParseName(ctxt);
8432
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8433
"error parsing attribute name\n");
8444
val = xmlParseAttValue(ctxt);
8445
ctxt->instate = XML_PARSER_CONTENT;
8447
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8448
"Specification mandate value for attribute %s\n", name);
8453
* Check that xml:lang conforms to the specification
8454
* No more registered as an error, just generate a warning now
8455
* since this was deprecated in XML second edition
8457
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8458
if (!xmlCheckLanguageID(val)) {
8459
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8460
"Malformed value for xml:lang : %s\n",
8466
* Check that xml:space conforms to the specification
8468
if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8469
if (xmlStrEqual(val, BAD_CAST "default"))
8471
else if (xmlStrEqual(val, BAD_CAST "preserve"))
8474
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8475
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8486
* @ctxt: an XML parser context
8488
* parse a start of tag either for rule element or
8489
* EmptyElement. In both case we don't parse the tag closing chars.
8491
* [40] STag ::= '<' Name (S Attribute)* S? '>'
8493
* [ WFC: Unique Att Spec ]
8494
* No attribute name may appear more than once in the same start-tag or
8495
* empty-element tag.
8497
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8499
* [ WFC: Unique Att Spec ]
8500
* No attribute name may appear more than once in the same start-tag or
8501
* empty-element tag.
8505
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8507
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8509
* Returns the element name parsed
8513
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8514
const xmlChar *name;
8515
const xmlChar *attname;
8517
const xmlChar **atts = ctxt->atts;
8519
int maxatts = ctxt->maxatts;
8522
if (RAW != '<') return(NULL);
8525
name = xmlParseName(ctxt);
8527
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8528
"xmlParseStartTag: invalid element name\n");
8533
* Now parse the attributes, it ends up with the ending
8540
while (((RAW != '>') &&
8541
((RAW != '/') || (NXT(1) != '>')) &&
8542
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8543
const xmlChar *q = CUR_PTR;
8544
unsigned int cons = ctxt->input->consumed;
8546
attname = xmlParseAttribute(ctxt, &attvalue);
8547
if ((attname != NULL) && (attvalue != NULL)) {
8549
* [ WFC: Unique Att Spec ]
8550
* No attribute name may appear more than once in the same
8551
* start-tag or empty-element tag.
8553
for (i = 0; i < nbatts;i += 2) {
8554
if (xmlStrEqual(atts[i], attname)) {
8555
xmlErrAttributeDup(ctxt, NULL, attname);
8561
* Add the pair to atts
8564
maxatts = 22; /* allow for 10 attrs by default */
8565
atts = (const xmlChar **)
8566
xmlMalloc(maxatts * sizeof(xmlChar *));
8568
xmlErrMemory(ctxt, NULL);
8569
if (attvalue != NULL)
8574
ctxt->maxatts = maxatts;
8575
} else if (nbatts + 4 > maxatts) {
8579
n = (const xmlChar **) xmlRealloc((void *) atts,
8580
maxatts * sizeof(const xmlChar *));
8582
xmlErrMemory(ctxt, NULL);
8583
if (attvalue != NULL)
8589
ctxt->maxatts = maxatts;
8591
atts[nbatts++] = attname;
8592
atts[nbatts++] = attvalue;
8593
atts[nbatts] = NULL;
8594
atts[nbatts + 1] = NULL;
8596
if (attvalue != NULL)
8603
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8605
if (!IS_BLANK_CH(RAW)) {
8606
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8607
"attributes construct error\n");
8610
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8611
(attname == NULL) && (attvalue == NULL)) {
8612
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8613
"xmlParseStartTag: problem parsing attributes\n");
8621
* SAX: Start of Element !
8623
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8624
(!ctxt->disableSAX)) {
8626
ctxt->sax->startElement(ctxt->userData, name, atts);
8628
ctxt->sax->startElement(ctxt->userData, name, NULL);
8632
/* Free only the content strings */
8633
for (i = 1;i < nbatts;i+=2)
8634
if (atts[i] != NULL)
8635
xmlFree((xmlChar *) atts[i]);
8642
* @ctxt: an XML parser context
8643
* @line: line of the start tag
8644
* @nsNr: number of namespaces on the start tag
8646
* parse an end of tag
8648
* [42] ETag ::= '</' Name S? '>'
8652
* [NS 9] ETag ::= '</' QName S? '>'
8656
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8657
const xmlChar *name;
8660
if ((RAW != '<') || (NXT(1) != '/')) {
8661
xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8662
"xmlParseEndTag: '</' not found\n");
8667
name = xmlParseNameAndCompare(ctxt,ctxt->name);
8670
* We should definitely be at the ending "S? '>'" part
8674
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8675
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8680
* [ WFC: Element Type Match ]
8681
* The Name in an element's end-tag must match the element type in the
8685
if (name != (xmlChar*)1) {
8686
if (name == NULL) name = BAD_CAST "unparseable";
8687
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8688
"Opening and ending tag mismatch: %s line %d and %s\n",
8689
ctxt->name, line, name);
8695
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8696
(!ctxt->disableSAX))
8697
ctxt->sax->endElement(ctxt->userData, ctxt->name);
8706
* @ctxt: an XML parser context
8708
* parse an end of tag
8710
* [42] ETag ::= '</' Name S? '>'
8714
* [NS 9] ETag ::= '</' QName S? '>'
8718
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8719
xmlParseEndTag1(ctxt, 0);
8721
#endif /* LIBXML_SAX1_ENABLED */
8723
/************************************************************************
8725
* SAX 2 specific operations *
8727
************************************************************************/
8731
* @ctxt: an XML parser context
8732
* @prefix: the prefix to lookup
8734
* Lookup the namespace name for the @prefix (which ca be NULL)
8735
* The prefix must come from the @ctxt->dict dictionnary
8737
* Returns the namespace name or NULL if not bound
8739
static const xmlChar *
8740
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8743
if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8744
for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8745
if (ctxt->nsTab[i] == prefix) {
8746
if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8748
return(ctxt->nsTab[i + 1]);
8755
* @ctxt: an XML parser context
8756
* @prefix: pointer to store the prefix part
8758
* parse an XML Namespace QName
8760
* [6] QName ::= (Prefix ':')? LocalPart
8761
* [7] Prefix ::= NCName
8762
* [8] LocalPart ::= NCName
8764
* Returns the Name parsed or NULL
8767
static const xmlChar *
8768
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8769
const xmlChar *l, *p;
8773
l = xmlParseNCName(ctxt);
8776
l = xmlParseName(ctxt);
8778
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8779
"Failed to parse QName '%s'\n", l, NULL, NULL);
8789
l = xmlParseNCName(ctxt);
8793
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8794
"Failed to parse QName '%s:'\n", p, NULL, NULL);
8795
l = xmlParseNmtoken(ctxt);
8797
tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8799
tmp = xmlBuildQName(l, p, NULL, 0);
8802
p = xmlDictLookup(ctxt->dict, tmp, -1);
8803
if (tmp != NULL) xmlFree(tmp);
8810
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8811
"Failed to parse QName '%s:%s:'\n", p, l, NULL);
8813
tmp = (xmlChar *) xmlParseName(ctxt);
8815
tmp = xmlBuildQName(tmp, l, NULL, 0);
8816
l = xmlDictLookup(ctxt->dict, tmp, -1);
8817
if (tmp != NULL) xmlFree(tmp);
8821
tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8822
l = xmlDictLookup(ctxt->dict, tmp, -1);
8823
if (tmp != NULL) xmlFree(tmp);
8834
* xmlParseQNameAndCompare:
8835
* @ctxt: an XML parser context
8836
* @name: the localname
8837
* @prefix: the prefix, if any.
8839
* parse an XML name and compares for match
8840
* (specialized for endtag parsing)
8842
* Returns NULL for an illegal name, (xmlChar*) 1 for success
8843
* and the name for mismatch
8846
static const xmlChar *
8847
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8848
xmlChar const *prefix) {
8852
const xmlChar *prefix2;
8854
if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8857
in = ctxt->input->cur;
8860
while (*in != 0 && *in == *cmp) {
8864
if ((*cmp == 0) && (*in == ':')) {
8867
while (*in != 0 && *in == *cmp) {
8871
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8873
ctxt->input->cur = in;
8874
return((const xmlChar*) 1);
8878
* all strings coms from the dictionary, equality can be done directly
8880
ret = xmlParseQName (ctxt, &prefix2);
8881
if ((ret == name) && (prefix == prefix2))
8882
return((const xmlChar*) 1);
8887
* xmlParseAttValueInternal:
8888
* @ctxt: an XML parser context
8889
* @len: attribute len result
8890
* @alloc: whether the attribute was reallocated as a new string
8891
* @normalize: if 1 then further non-CDATA normalization must be done
8893
* parse a value for an attribute.
8894
* NOTE: if no normalization is needed, the routine will return pointers
8895
* directly from the data buffer.
8897
* 3.3.3 Attribute-Value Normalization:
8898
* Before the value of an attribute is passed to the application or
8899
* checked for validity, the XML processor must normalize it as follows:
8900
* - a character reference is processed by appending the referenced
8901
* character to the attribute value
8902
* - an entity reference is processed by recursively processing the
8903
* replacement text of the entity
8904
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8905
* appending #x20 to the normalized value, except that only a single
8906
* #x20 is appended for a "#xD#xA" sequence that is part of an external
8907
* parsed entity or the literal entity value of an internal parsed entity
8908
* - other characters are processed by appending them to the normalized value
8909
* If the declared value is not CDATA, then the XML processor must further
8910
* process the normalized attribute value by discarding any leading and
8911
* trailing space (#x20) characters, and by replacing sequences of space
8912
* (#x20) characters by a single space (#x20) character.
8913
* All attributes for which no declaration has been read should be treated
8914
* by a non-validating parser as if declared CDATA.
8916
* Returns the AttValue parsed or NULL. The value has to be freed by the
8917
* caller if it was copied, this can be detected by val[*len] == 0.
8921
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8925
const xmlChar *in = NULL, *start, *end, *last;
8926
xmlChar *ret = NULL;
8929
in = (xmlChar *) CUR_PTR;
8930
if (*in != '"' && *in != '\'') {
8931
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8934
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8937
* try to handle in this routine the most common case where no
8938
* allocation of a new string is required and where content is
8942
end = ctxt->input->end;
8945
const xmlChar *oldbase = ctxt->input->base;
8947
if (oldbase != ctxt->input->base) {
8948
long delta = ctxt->input->base - oldbase;
8949
start = start + delta;
8952
end = ctxt->input->end;
8956
* Skip any leading spaces
8958
while ((in < end) && (*in != limit) &&
8959
((*in == 0x20) || (*in == 0x9) ||
8960
(*in == 0xA) || (*in == 0xD))) {
8964
const xmlChar *oldbase = ctxt->input->base;
8966
if (ctxt->instate == XML_PARSER_EOF)
8968
if (oldbase != ctxt->input->base) {
8969
long delta = ctxt->input->base - oldbase;
8970
start = start + delta;
8973
end = ctxt->input->end;
8974
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8975
((ctxt->options & XML_PARSE_HUGE) == 0)) {
8976
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8977
"AttValue length too long\n");
8982
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8983
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8984
if ((*in++ == 0x20) && (*in == 0x20)) break;
8986
const xmlChar *oldbase = ctxt->input->base;
8988
if (ctxt->instate == XML_PARSER_EOF)
8990
if (oldbase != ctxt->input->base) {
8991
long delta = ctxt->input->base - oldbase;
8992
start = start + delta;
8995
end = ctxt->input->end;
8996
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8997
((ctxt->options & XML_PARSE_HUGE) == 0)) {
8998
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8999
"AttValue length too long\n");
9006
* skip the trailing blanks
9008
while ((last[-1] == 0x20) && (last > start)) last--;
9009
while ((in < end) && (*in != limit) &&
9010
((*in == 0x20) || (*in == 0x9) ||
9011
(*in == 0xA) || (*in == 0xD))) {
9014
const xmlChar *oldbase = ctxt->input->base;
9016
if (ctxt->instate == XML_PARSER_EOF)
9018
if (oldbase != ctxt->input->base) {
9019
long delta = ctxt->input->base - oldbase;
9020
start = start + delta;
9022
last = last + delta;
9024
end = ctxt->input->end;
9025
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9026
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9027
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9028
"AttValue length too long\n");
9033
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9034
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9035
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9036
"AttValue length too long\n");
9039
if (*in != limit) goto need_complex;
9041
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9042
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9045
const xmlChar *oldbase = ctxt->input->base;
9047
if (ctxt->instate == XML_PARSER_EOF)
9049
if (oldbase != ctxt->input->base) {
9050
long delta = ctxt->input->base - oldbase;
9051
start = start + delta;
9054
end = ctxt->input->end;
9055
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9056
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9057
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9058
"AttValue length too long\n");
9064
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9065
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9066
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9067
"AttValue length too long\n");
9070
if (*in != limit) goto need_complex;
9074
*len = last - start;
9075
ret = (xmlChar *) start;
9077
if (alloc) *alloc = 1;
9078
ret = xmlStrndup(start, last - start);
9081
if (alloc) *alloc = 0;
9084
if (alloc) *alloc = 1;
9085
return xmlParseAttValueComplex(ctxt, len, normalize);
9089
* xmlParseAttribute2:
9090
* @ctxt: an XML parser context
9091
* @pref: the element prefix
9092
* @elem: the element name
9093
* @prefix: a xmlChar ** used to store the value of the attribute prefix
9094
* @value: a xmlChar ** used to store the value of the attribute
9095
* @len: an int * to save the length of the attribute
9096
* @alloc: an int * to indicate if the attribute was allocated
9098
* parse an attribute in the new SAX2 framework.
9100
* Returns the attribute name, and the value in *value, .
9103
static const xmlChar *
9104
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9105
const xmlChar * pref, const xmlChar * elem,
9106
const xmlChar ** prefix, xmlChar ** value,
9107
int *len, int *alloc)
9109
const xmlChar *name;
9110
xmlChar *val, *internal_val = NULL;
9115
name = xmlParseQName(ctxt, prefix);
9117
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9118
"error parsing attribute name\n");
9123
* get the type if needed
9125
if (ctxt->attsSpecial != NULL) {
9128
type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9129
pref, elem, *prefix, name);
9141
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9144
* Sometimes a second normalisation pass for spaces is needed
9145
* but that only happens if charrefs or entities refernces
9146
* have been used in the attribute value, i.e. the attribute
9147
* value have been extracted in an allocated string already.
9150
const xmlChar *val2;
9152
val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9153
if ((val2 != NULL) && (val2 != val)) {
9155
val = (xmlChar *) val2;
9159
ctxt->instate = XML_PARSER_CONTENT;
9161
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9162
"Specification mandate value for attribute %s\n",
9167
if (*prefix == ctxt->str_xml) {
9169
* Check that xml:lang conforms to the specification
9170
* No more registered as an error, just generate a warning now
9171
* since this was deprecated in XML second edition
9173
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9174
internal_val = xmlStrndup(val, *len);
9175
if (!xmlCheckLanguageID(internal_val)) {
9176
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9177
"Malformed value for xml:lang : %s\n",
9178
internal_val, NULL);
9183
* Check that xml:space conforms to the specification
9185
if (xmlStrEqual(name, BAD_CAST "space")) {
9186
internal_val = xmlStrndup(val, *len);
9187
if (xmlStrEqual(internal_val, BAD_CAST "default"))
9189
else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9192
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9193
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9194
internal_val, NULL);
9198
xmlFree(internal_val);
9206
* xmlParseStartTag2:
9207
* @ctxt: an XML parser context
9209
* parse a start of tag either for rule element or
9210
* EmptyElement. In both case we don't parse the tag closing chars.
9211
* This routine is called when running SAX2 parsing
9213
* [40] STag ::= '<' Name (S Attribute)* S? '>'
9215
* [ WFC: Unique Att Spec ]
9216
* No attribute name may appear more than once in the same start-tag or
9217
* empty-element tag.
9219
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9221
* [ WFC: Unique Att Spec ]
9222
* No attribute name may appear more than once in the same start-tag or
9223
* empty-element tag.
9227
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9229
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9231
* Returns the element name parsed
9234
static const xmlChar *
9235
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9236
const xmlChar **URI, int *tlen) {
9237
const xmlChar *localname;
9238
const xmlChar *prefix;
9239
const xmlChar *attname;
9240
const xmlChar *aprefix;
9241
const xmlChar *nsname;
9243
const xmlChar **atts = ctxt->atts;
9244
int maxatts = ctxt->maxatts;
9245
int nratts, nbatts, nbdef;
9246
int i, j, nbNs, attval, oldline, oldcol;
9247
const xmlChar *base;
9249
int nsNr = ctxt->nsNr;
9251
if (RAW != '<') return(NULL);
9255
* NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9256
* point since the attribute values may be stored as pointers to
9257
* the buffer and calling SHRINK would destroy them !
9258
* The Shrinking is only possible once the full set of attribute
9259
* callbacks have been done.
9263
base = ctxt->input->base;
9264
cur = ctxt->input->cur - ctxt->input->base;
9265
oldline = ctxt->input->line;
9266
oldcol = ctxt->input->col;
9272
/* Forget any namespaces added during an earlier parse of this element. */
9275
localname = xmlParseQName(ctxt, &prefix);
9276
if (localname == NULL) {
9277
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9278
"StartTag: invalid element name\n");
9281
*tlen = ctxt->input->cur - ctxt->input->base - cur;
9284
* Now parse the attributes, it ends up with the ending
9290
if (ctxt->input->base != base) goto base_changed;
9292
while (((RAW != '>') &&
9293
((RAW != '/') || (NXT(1) != '>')) &&
9294
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9295
const xmlChar *q = CUR_PTR;
9296
unsigned int cons = ctxt->input->consumed;
9297
int len = -1, alloc = 0;
9299
attname = xmlParseAttribute2(ctxt, prefix, localname,
9300
&aprefix, &attvalue, &len, &alloc);
9301
if (ctxt->input->base != base) {
9302
if ((attvalue != NULL) && (alloc != 0))
9307
if ((attname != NULL) && (attvalue != NULL)) {
9308
if (len < 0) len = xmlStrlen(attvalue);
9309
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9310
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9314
uri = xmlParseURI((const char *) URL);
9316
xmlNsErr(ctxt, XML_WAR_NS_URI,
9317
"xmlns: '%s' is not a valid URI\n",
9320
if (uri->scheme == NULL) {
9321
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9322
"xmlns: URI %s is not absolute\n",
9327
if (URL == ctxt->str_xml_ns) {
9328
if (attname != ctxt->str_xml) {
9329
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9330
"xml namespace URI cannot be the default namespace\n",
9333
goto skip_default_ns;
9337
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9338
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9339
"reuse of the xmlns namespace name is forbidden\n",
9341
goto skip_default_ns;
9345
* check that it's not a defined namespace
9347
for (j = 1;j <= nbNs;j++)
9348
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9351
xmlErrAttributeDup(ctxt, NULL, attname);
9353
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9355
if (alloc != 0) xmlFree(attvalue);
9359
if (aprefix == ctxt->str_xmlns) {
9360
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9363
if (attname == ctxt->str_xml) {
9364
if (URL != ctxt->str_xml_ns) {
9365
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9366
"xml namespace prefix mapped to wrong URI\n",
9370
* Do not keep a namespace definition node
9374
if (URL == ctxt->str_xml_ns) {
9375
if (attname != ctxt->str_xml) {
9376
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9377
"xml namespace URI mapped to wrong prefix\n",
9382
if (attname == ctxt->str_xmlns) {
9383
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9384
"redefinition of the xmlns prefix is forbidden\n",
9390
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9391
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392
"reuse of the xmlns namespace name is forbidden\n",
9396
if ((URL == NULL) || (URL[0] == 0)) {
9397
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9398
"xmlns:%s: Empty XML namespace is not allowed\n",
9399
attname, NULL, NULL);
9402
uri = xmlParseURI((const char *) URL);
9404
xmlNsErr(ctxt, XML_WAR_NS_URI,
9405
"xmlns:%s: '%s' is not a valid URI\n",
9406
attname, URL, NULL);
9408
if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9409
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9410
"xmlns:%s: URI %s is not absolute\n",
9411
attname, URL, NULL);
9418
* check that it's not a defined namespace
9420
for (j = 1;j <= nbNs;j++)
9421
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9424
xmlErrAttributeDup(ctxt, aprefix, attname);
9426
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9428
if (alloc != 0) xmlFree(attvalue);
9430
if (ctxt->input->base != base) goto base_changed;
9435
* Add the pair to atts
9437
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9438
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9439
if (attvalue[len] == 0)
9443
maxatts = ctxt->maxatts;
9446
ctxt->attallocs[nratts++] = alloc;
9447
atts[nbatts++] = attname;
9448
atts[nbatts++] = aprefix;
9449
atts[nbatts++] = NULL; /* the URI will be fetched later */
9450
atts[nbatts++] = attvalue;
9452
atts[nbatts++] = attvalue;
9454
* tag if some deallocation is needed
9456
if (alloc != 0) attval = 1;
9458
if ((attvalue != NULL) && (attvalue[len] == 0))
9465
if (ctxt->instate == XML_PARSER_EOF)
9467
if (ctxt->input->base != base) goto base_changed;
9468
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9470
if (!IS_BLANK_CH(RAW)) {
9471
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9472
"attributes construct error\n");
9476
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9477
(attname == NULL) && (attvalue == NULL)) {
9478
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9479
"xmlParseStartTag: problem parsing attributes\n");
9483
if (ctxt->input->base != base) goto base_changed;
9487
* The attributes defaulting
9489
if (ctxt->attsDefault != NULL) {
9490
xmlDefAttrsPtr defaults;
9492
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9493
if (defaults != NULL) {
9494
for (i = 0;i < defaults->nbAttrs;i++) {
9495
attname = defaults->values[5 * i];
9496
aprefix = defaults->values[5 * i + 1];
9499
* special work for namespaces defaulted defs
9501
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9503
* check that it's not a defined namespace
9505
for (j = 1;j <= nbNs;j++)
9506
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9508
if (j <= nbNs) continue;
9510
nsname = xmlGetNamespace(ctxt, NULL);
9511
if (nsname != defaults->values[5 * i + 2]) {
9512
if (nsPush(ctxt, NULL,
9513
defaults->values[5 * i + 2]) > 0)
9516
} else if (aprefix == ctxt->str_xmlns) {
9518
* check that it's not a defined namespace
9520
for (j = 1;j <= nbNs;j++)
9521
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9523
if (j <= nbNs) continue;
9525
nsname = xmlGetNamespace(ctxt, attname);
9526
if (nsname != defaults->values[2]) {
9527
if (nsPush(ctxt, attname,
9528
defaults->values[5 * i + 2]) > 0)
9533
* check that it's not a defined attribute
9535
for (j = 0;j < nbatts;j+=5) {
9536
if ((attname == atts[j]) && (aprefix == atts[j+1]))
9539
if (j < nbatts) continue;
9541
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9542
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9545
maxatts = ctxt->maxatts;
9548
atts[nbatts++] = attname;
9549
atts[nbatts++] = aprefix;
9550
if (aprefix == NULL)
9551
atts[nbatts++] = NULL;
9553
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9554
atts[nbatts++] = defaults->values[5 * i + 2];
9555
atts[nbatts++] = defaults->values[5 * i + 3];
9556
if ((ctxt->standalone == 1) &&
9557
(defaults->values[5 * i + 4] != NULL)) {
9558
xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9559
"standalone: attribute %s on %s defaulted from external subset\n",
9560
attname, localname);
9569
* The attributes checkings
9571
for (i = 0; i < nbatts;i += 5) {
9573
* The default namespace does not apply to attribute names.
9575
if (atts[i + 1] != NULL) {
9576
nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9577
if (nsname == NULL) {
9578
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9579
"Namespace prefix %s for %s on %s is not defined\n",
9580
atts[i + 1], atts[i], localname);
9582
atts[i + 2] = nsname;
9586
* [ WFC: Unique Att Spec ]
9587
* No attribute name may appear more than once in the same
9588
* start-tag or empty-element tag.
9589
* As extended by the Namespace in XML REC.
9591
for (j = 0; j < i;j += 5) {
9592
if (atts[i] == atts[j]) {
9593
if (atts[i+1] == atts[j+1]) {
9594
xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9597
if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9598
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9599
"Namespaced Attribute %s in '%s' redefined\n",
9600
atts[i], nsname, NULL);
9607
nsname = xmlGetNamespace(ctxt, prefix);
9608
if ((prefix != NULL) && (nsname == NULL)) {
9609
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9610
"Namespace prefix %s on %s is not defined\n",
9611
prefix, localname, NULL);
9617
* SAX: Start of Element !
9619
if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9620
(!ctxt->disableSAX)) {
9622
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9623
nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9624
nbatts / 5, nbdef, atts);
9626
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9627
nsname, 0, NULL, nbatts / 5, nbdef, atts);
9631
* Free up attribute allocated strings if needed
9634
for (i = 3,j = 0; j < nratts;i += 5,j++)
9635
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9636
xmlFree((xmlChar *) atts[i]);
9643
* the attribute strings are valid iif the base didn't changed
9646
for (i = 3,j = 0; j < nratts;i += 5,j++)
9647
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9648
xmlFree((xmlChar *) atts[i]);
9650
ctxt->input->cur = ctxt->input->base + cur;
9651
ctxt->input->line = oldline;
9652
ctxt->input->col = oldcol;
9653
if (ctxt->wellFormed == 1) {
9661
* @ctxt: an XML parser context
9662
* @line: line of the start tag
9663
* @nsNr: number of namespaces on the start tag
9665
* parse an end of tag
9667
* [42] ETag ::= '</' Name S? '>'
9671
* [NS 9] ETag ::= '</' QName S? '>'
9675
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9676
const xmlChar *URI, int line, int nsNr, int tlen) {
9677
const xmlChar *name;
9680
if ((RAW != '<') || (NXT(1) != '/')) {
9681
xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9686
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9687
if (ctxt->input->cur[tlen] == '>') {
9688
ctxt->input->cur += tlen + 1;
9691
ctxt->input->cur += tlen;
9695
name = xmlParseNameAndCompare(ctxt, ctxt->name);
9697
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9701
* We should definitely be at the ending "S? '>'" part
9704
if (ctxt->instate == XML_PARSER_EOF)
9707
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9708
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9713
* [ WFC: Element Type Match ]
9714
* The Name in an element's end-tag must match the element type in the
9718
if (name != (xmlChar*)1) {
9719
if (name == NULL) name = BAD_CAST "unparseable";
9720
if ((line == 0) && (ctxt->node != NULL))
9721
line = ctxt->node->line;
9722
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9723
"Opening and ending tag mismatch: %s line %d and %s\n",
9724
ctxt->name, line, name);
9731
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9732
(!ctxt->disableSAX))
9733
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9743
* @ctxt: an XML parser context
9745
* Parse escaped pure raw content.
9747
* [18] CDSect ::= CDStart CData CDEnd
9749
* [19] CDStart ::= '<![CDATA['
9751
* [20] Data ::= (Char* - (Char* ']]>' Char*))
9753
* [21] CDEnd ::= ']]>'
9756
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9757
xmlChar *buf = NULL;
9759
int size = XML_PARSER_BUFFER_SIZE;
9765
/* Check 2.6.0 was NXT(0) not RAW */
9766
if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9771
ctxt->instate = XML_PARSER_CDATA_SECTION;
9774
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9775
ctxt->instate = XML_PARSER_CONTENT;
9781
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9782
ctxt->instate = XML_PARSER_CONTENT;
9787
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9789
xmlErrMemory(ctxt, NULL);
9792
while (IS_CHAR(cur) &&
9793
((r != ']') || (s != ']') || (cur != '>'))) {
9794
if (len + 5 >= size) {
9797
if ((size > XML_MAX_TEXT_LENGTH) &&
9798
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9799
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9800
"CData section too big found", NULL);
9804
tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9807
xmlErrMemory(ctxt, NULL);
9813
COPY_BUF(rl,buf,len,r);
9821
if (ctxt->instate == XML_PARSER_EOF) {
9831
ctxt->instate = XML_PARSER_CONTENT;
9833
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9834
"CData section not finished\n%.50s\n", buf);
9841
* OK the buffer is to be consumed as cdata.
9843
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9844
if (ctxt->sax->cdataBlock != NULL)
9845
ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9846
else if (ctxt->sax->characters != NULL)
9847
ctxt->sax->characters(ctxt->userData, buf, len);
9854
* @ctxt: an XML parser context
9858
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9862
xmlParseContent(xmlParserCtxtPtr ctxt) {
9864
while ((RAW != 0) &&
9865
((RAW != '<') || (NXT(1) != '/')) &&
9866
(ctxt->instate != XML_PARSER_EOF)) {
9867
const xmlChar *test = CUR_PTR;
9868
unsigned int cons = ctxt->input->consumed;
9869
const xmlChar *cur = ctxt->input->cur;
9872
* First case : a Processing Instruction.
9874
if ((*cur == '<') && (cur[1] == '?')) {
9879
* Second case : a CDSection
9881
/* 2.6.0 test was *cur not RAW */
9882
else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9883
xmlParseCDSect(ctxt);
9887
* Third case : a comment
9889
else if ((*cur == '<') && (NXT(1) == '!') &&
9890
(NXT(2) == '-') && (NXT(3) == '-')) {
9891
xmlParseComment(ctxt);
9892
ctxt->instate = XML_PARSER_CONTENT;
9896
* Fourth case : a sub-element.
9898
else if (*cur == '<') {
9899
xmlParseElement(ctxt);
9903
* Fifth case : a reference. If if has not been resolved,
9904
* parsing returns it's Name, create the node
9907
else if (*cur == '&') {
9908
xmlParseReference(ctxt);
9912
* Last case, text. Note that References are handled directly.
9915
xmlParseCharData(ctxt, 0);
9920
* Pop-up of finished entities.
9922
while ((RAW == 0) && (ctxt->inputNr > 1))
9926
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9927
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9928
"detected an error in element content\n");
9929
ctxt->instate = XML_PARSER_EOF;
9937
* @ctxt: an XML parser context
9939
* parse an XML element, this is highly recursive
9941
* [39] element ::= EmptyElemTag | STag content ETag
9943
* [ WFC: Element Type Match ]
9944
* The Name in an element's end-tag must match the element type in the
9950
xmlParseElement(xmlParserCtxtPtr ctxt) {
9951
const xmlChar *name;
9952
const xmlChar *prefix = NULL;
9953
const xmlChar *URI = NULL;
9954
xmlParserNodeInfo node_info;
9957
int nsNr = ctxt->nsNr;
9959
if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9960
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9961
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9962
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9964
ctxt->instate = XML_PARSER_EOF;
9968
/* Capture start position */
9969
if (ctxt->record_info) {
9970
node_info.begin_pos = ctxt->input->consumed +
9971
(CUR_PTR - ctxt->input->base);
9972
node_info.begin_line = ctxt->input->line;
9975
if (ctxt->spaceNr == 0)
9976
spacePush(ctxt, -1);
9977
else if (*ctxt->space == -2)
9978
spacePush(ctxt, -1);
9980
spacePush(ctxt, *ctxt->space);
9982
line = ctxt->input->line;
9983
#ifdef LIBXML_SAX1_ENABLED
9985
#endif /* LIBXML_SAX1_ENABLED */
9986
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9987
#ifdef LIBXML_SAX1_ENABLED
9989
name = xmlParseStartTag(ctxt);
9990
#endif /* LIBXML_SAX1_ENABLED */
9991
if (ctxt->instate == XML_PARSER_EOF)
9997
namePush(ctxt, name);
10000
#ifdef LIBXML_VALID_ENABLED
10002
* [ VC: Root Element Type ]
10003
* The Name in the document type declaration must match the element
10004
* type of the root element.
10006
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10007
ctxt->node && (ctxt->node == ctxt->myDoc->children))
10008
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10009
#endif /* LIBXML_VALID_ENABLED */
10012
* Check for an Empty Element.
10014
if ((RAW == '/') && (NXT(1) == '>')) {
10017
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10018
(!ctxt->disableSAX))
10019
ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10020
#ifdef LIBXML_SAX1_ENABLED
10022
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10023
(!ctxt->disableSAX))
10024
ctxt->sax->endElement(ctxt->userData, name);
10025
#endif /* LIBXML_SAX1_ENABLED */
10029
if (nsNr != ctxt->nsNr)
10030
nsPop(ctxt, ctxt->nsNr - nsNr);
10031
if ( ret != NULL && ctxt->record_info ) {
10032
node_info.end_pos = ctxt->input->consumed +
10033
(CUR_PTR - ctxt->input->base);
10034
node_info.end_line = ctxt->input->line;
10035
node_info.node = ret;
10036
xmlParserAddNodeInfo(ctxt, &node_info);
10043
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10044
"Couldn't find end of Start Tag %s line %d\n",
10048
* end of parsing of this node.
10053
if (nsNr != ctxt->nsNr)
10054
nsPop(ctxt, ctxt->nsNr - nsNr);
10057
* Capture end position and add node
10059
if ( ret != NULL && ctxt->record_info ) {
10060
node_info.end_pos = ctxt->input->consumed +
10061
(CUR_PTR - ctxt->input->base);
10062
node_info.end_line = ctxt->input->line;
10063
node_info.node = ret;
10064
xmlParserAddNodeInfo(ctxt, &node_info);
10070
* Parse the content of the element:
10072
xmlParseContent(ctxt);
10073
if (ctxt->instate == XML_PARSER_EOF)
10075
if (!IS_BYTE_CHAR(RAW)) {
10076
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10077
"Premature end of data in tag %s line %d\n",
10081
* end of parsing of this node.
10086
if (nsNr != ctxt->nsNr)
10087
nsPop(ctxt, ctxt->nsNr - nsNr);
10092
* parse the end of tag: '</' should be here.
10095
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10098
#ifdef LIBXML_SAX1_ENABLED
10100
xmlParseEndTag1(ctxt, line);
10101
#endif /* LIBXML_SAX1_ENABLED */
10104
* Capture end position and add node
10106
if ( ret != NULL && ctxt->record_info ) {
10107
node_info.end_pos = ctxt->input->consumed +
10108
(CUR_PTR - ctxt->input->base);
10109
node_info.end_line = ctxt->input->line;
10110
node_info.node = ret;
10111
xmlParserAddNodeInfo(ctxt, &node_info);
10116
* xmlParseVersionNum:
10117
* @ctxt: an XML parser context
10119
* parse the XML version value.
10121
* [26] VersionNum ::= '1.' [0-9]+
10123
* In practice allow [0-9].[0-9]+ at that level
10125
* Returns the string giving the XML version number, or NULL
10128
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10129
xmlChar *buf = NULL;
10134
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10136
xmlErrMemory(ctxt, NULL);
10140
if (!((cur >= '0') && (cur <= '9'))) {
10154
while ((cur >= '0') && (cur <= '9')) {
10155
if (len + 1 >= size) {
10159
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10162
xmlErrMemory(ctxt, NULL);
10176
* xmlParseVersionInfo:
10177
* @ctxt: an XML parser context
10179
* parse the XML version.
10181
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10183
* [25] Eq ::= S? '=' S?
10185
* Returns the version string, e.g. "1.0"
10189
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10190
xmlChar *version = NULL;
10192
if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10196
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10203
version = xmlParseVersionNum(ctxt);
10205
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10208
} else if (RAW == '\''){
10210
version = xmlParseVersionNum(ctxt);
10212
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10216
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10224
* @ctxt: an XML parser context
10226
* parse the XML encoding name
10228
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10230
* Returns the encoding name value or NULL
10233
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10234
xmlChar *buf = NULL;
10240
if (((cur >= 'a') && (cur <= 'z')) ||
10241
((cur >= 'A') && (cur <= 'Z'))) {
10242
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10244
xmlErrMemory(ctxt, NULL);
10251
while (((cur >= 'a') && (cur <= 'z')) ||
10252
((cur >= 'A') && (cur <= 'Z')) ||
10253
((cur >= '0') && (cur <= '9')) ||
10254
(cur == '.') || (cur == '_') ||
10256
if (len + 1 >= size) {
10260
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10262
xmlErrMemory(ctxt, NULL);
10279
xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10285
* xmlParseEncodingDecl:
10286
* @ctxt: an XML parser context
10288
* parse the XML encoding declaration
10290
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10292
* this setups the conversion filters.
10294
* Returns the encoding value or NULL
10298
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10299
xmlChar *encoding = NULL;
10302
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10306
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10313
encoding = xmlParseEncName(ctxt);
10315
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10318
} else if (RAW == '\''){
10320
encoding = xmlParseEncName(ctxt);
10322
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10326
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10330
* Non standard parsing, allowing the user to ignore encoding
10332
if (ctxt->options & XML_PARSE_IGNORE_ENC)
10336
* UTF-16 encoding stwich has already taken place at this stage,
10337
* more over the little-endian/big-endian selection is already done
10339
if ((encoding != NULL) &&
10340
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10341
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10343
* If no encoding was passed to the parser, that we are
10344
* using UTF-16 and no decoder is present i.e. the
10345
* document is apparently UTF-8 compatible, then raise an
10346
* encoding mismatch fatal error
10348
if ((ctxt->encoding == NULL) &&
10349
(ctxt->input->buf != NULL) &&
10350
(ctxt->input->buf->encoder == NULL)) {
10351
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10352
"Document labelled UTF-16 but has UTF-8 content\n");
10354
if (ctxt->encoding != NULL)
10355
xmlFree((xmlChar *) ctxt->encoding);
10356
ctxt->encoding = encoding;
10359
* UTF-8 encoding is handled natively
10361
else if ((encoding != NULL) &&
10362
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10363
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10364
if (ctxt->encoding != NULL)
10365
xmlFree((xmlChar *) ctxt->encoding);
10366
ctxt->encoding = encoding;
10368
else if (encoding != NULL) {
10369
xmlCharEncodingHandlerPtr handler;
10371
if (ctxt->input->encoding != NULL)
10372
xmlFree((xmlChar *) ctxt->input->encoding);
10373
ctxt->input->encoding = encoding;
10375
handler = xmlFindCharEncodingHandler((const char *) encoding);
10376
if (handler != NULL) {
10377
xmlSwitchToEncoding(ctxt, handler);
10379
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10380
"Unsupported encoding %s\n", encoding);
10390
* @ctxt: an XML parser context
10392
* parse the XML standalone declaration
10394
* [32] SDDecl ::= S 'standalone' Eq
10395
* (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10397
* [ VC: Standalone Document Declaration ]
10398
* TODO The standalone document declaration must have the value "no"
10399
* if any external markup declarations contain declarations of:
10400
* - attributes with default values, if elements to which these
10401
* attributes apply appear in the document without specifications
10402
* of values for these attributes, or
10403
* - entities (other than amp, lt, gt, apos, quot), if references
10404
* to those entities appear in the document, or
10405
* - attributes with values subject to normalization, where the
10406
* attribute appears in the document with a value which will change
10407
* as a result of normalization, or
10408
* - element types with element content, if white space occurs directly
10409
* within any instance of those types.
10412
* 1 if standalone="yes"
10413
* 0 if standalone="no"
10414
* -2 if standalone attribute is missing or invalid
10415
* (A standalone value of -2 means that the XML declaration was found,
10416
* but no value was specified for the standalone attribute).
10420
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10421
int standalone = -2;
10424
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10428
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10429
return(standalone);
10435
if ((RAW == 'n') && (NXT(1) == 'o')) {
10438
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10443
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10446
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10449
} else if (RAW == '"'){
10451
if ((RAW == 'n') && (NXT(1) == 'o')) {
10454
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10459
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10462
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10466
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10469
return(standalone);
10474
* @ctxt: an XML parser context
10476
* parse an XML declaration header
10478
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10482
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10486
* This value for standalone indicates that the document has an
10487
* XML declaration but it does not have a standalone attribute.
10488
* It will be overwritten later if a standalone attribute is found.
10490
ctxt->input->standalone = -2;
10493
* We know that '<?xml' is here.
10497
if (!IS_BLANK_CH(RAW)) {
10498
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10499
"Blank needed after '<?xml'\n");
10504
* We must have the VersionInfo here.
10506
version = xmlParseVersionInfo(ctxt);
10507
if (version == NULL) {
10508
xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10510
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10512
* Changed here for XML-1.0 5th edition
10514
if (ctxt->options & XML_PARSE_OLD10) {
10515
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10516
"Unsupported version '%s'\n",
10519
if ((version[0] == '1') && ((version[1] == '.'))) {
10520
xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10521
"Unsupported version '%s'\n",
10524
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10525
"Unsupported version '%s'\n",
10530
if (ctxt->version != NULL)
10531
xmlFree((void *) ctxt->version);
10532
ctxt->version = version;
10536
* We may have the encoding declaration
10538
if (!IS_BLANK_CH(RAW)) {
10539
if ((RAW == '?') && (NXT(1) == '>')) {
10543
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10545
xmlParseEncodingDecl(ctxt);
10546
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10548
* The XML REC instructs us to stop parsing right here
10554
* We may have the standalone status.
10556
if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10557
if ((RAW == '?') && (NXT(1) == '>')) {
10561
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10565
* We can grow the input buffer freely at that point
10570
ctxt->input->standalone = xmlParseSDDecl(ctxt);
10573
if ((RAW == '?') && (NXT(1) == '>')) {
10575
} else if (RAW == '>') {
10576
/* Deprecated old WD ... */
10577
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10580
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10581
MOVETO_ENDTAG(CUR_PTR);
10588
* @ctxt: an XML parser context
10590
* parse an XML Misc* optional field.
10592
* [27] Misc ::= Comment | PI | S
10596
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10597
while ((ctxt->instate != XML_PARSER_EOF) &&
10598
(((RAW == '<') && (NXT(1) == '?')) ||
10599
(CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10600
IS_BLANK_CH(CUR))) {
10601
if ((RAW == '<') && (NXT(1) == '?')) {
10603
} else if (IS_BLANK_CH(CUR)) {
10606
xmlParseComment(ctxt);
10611
* xmlParseDocument:
10612
* @ctxt: an XML parser context
10614
* parse an XML document (and build a tree if using the standard SAX
10617
* [1] document ::= prolog element Misc*
10619
* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10621
* Returns 0, -1 in case of error. the parser context is augmented
10622
* as a result of the parsing.
10626
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10628
xmlCharEncoding enc;
10632
if ((ctxt == NULL) || (ctxt->input == NULL))
10638
* SAX: detecting the level.
10640
xmlDetectSAX2(ctxt);
10643
* SAX: beginning of the document processing.
10645
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10646
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10647
if (ctxt->instate == XML_PARSER_EOF)
10650
if ((ctxt->encoding == NULL) &&
10651
((ctxt->input->end - ctxt->input->cur) >= 4)) {
10653
* Get the 4 first bytes and decode the charset
10654
* if enc != XML_CHAR_ENCODING_NONE
10655
* plug some encoding conversion routines.
10661
enc = xmlDetectCharEncoding(&start[0], 4);
10662
if (enc != XML_CHAR_ENCODING_NONE) {
10663
xmlSwitchEncoding(ctxt, enc);
10669
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10673
* Check for the XMLDecl in the Prolog.
10674
* do not GROW here to avoid the detected encoder to decode more
10675
* than just the first line, unless the amount of data is really
10676
* too small to hold "<?xml version="1.0" encoding="foo"
10678
if ((ctxt->input->end - ctxt->input->cur) < 35) {
10681
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10684
* Note that we will switch encoding on the fly.
10686
xmlParseXMLDecl(ctxt);
10687
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10689
* The XML REC instructs us to stop parsing right here
10693
ctxt->standalone = ctxt->input->standalone;
10696
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10698
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10699
ctxt->sax->startDocument(ctxt->userData);
10700
if (ctxt->instate == XML_PARSER_EOF)
10704
* The Misc part of the Prolog
10707
xmlParseMisc(ctxt);
10710
* Then possibly doc type declaration(s) and more Misc
10711
* (doctypedecl Misc*)?
10714
if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10716
ctxt->inSubset = 1;
10717
xmlParseDocTypeDecl(ctxt);
10719
ctxt->instate = XML_PARSER_DTD;
10720
xmlParseInternalSubset(ctxt);
10721
if (ctxt->instate == XML_PARSER_EOF)
10726
* Create and update the external subset.
10728
ctxt->inSubset = 2;
10729
if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10730
(!ctxt->disableSAX))
10731
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10732
ctxt->extSubSystem, ctxt->extSubURI);
10733
if (ctxt->instate == XML_PARSER_EOF)
10735
ctxt->inSubset = 0;
10737
xmlCleanSpecialAttr(ctxt);
10739
ctxt->instate = XML_PARSER_PROLOG;
10740
xmlParseMisc(ctxt);
10744
* Time to start parsing the tree itself
10748
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10749
"Start tag expected, '<' not found\n");
10751
ctxt->instate = XML_PARSER_CONTENT;
10752
xmlParseElement(ctxt);
10753
ctxt->instate = XML_PARSER_EPILOG;
10757
* The Misc part at the end
10759
xmlParseMisc(ctxt);
10762
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10764
ctxt->instate = XML_PARSER_EOF;
10768
* SAX: end of the document processing.
10770
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10771
ctxt->sax->endDocument(ctxt->userData);
10774
* Remove locally kept entity definitions if the tree was not built
10776
if ((ctxt->myDoc != NULL) &&
10777
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10778
xmlFreeDoc(ctxt->myDoc);
10779
ctxt->myDoc = NULL;
10782
if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10783
ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10785
ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10786
if (ctxt->nsWellFormed)
10787
ctxt->myDoc->properties |= XML_DOC_NSVALID;
10788
if (ctxt->options & XML_PARSE_OLD10)
10789
ctxt->myDoc->properties |= XML_DOC_OLD10;
10791
if (! ctxt->wellFormed) {
10799
* xmlParseExtParsedEnt:
10800
* @ctxt: an XML parser context
10802
* parse a general parsed entity
10803
* An external general parsed entity is well-formed if it matches the
10804
* production labeled extParsedEnt.
10806
* [78] extParsedEnt ::= TextDecl? content
10808
* Returns 0, -1 in case of error. the parser context is augmented
10809
* as a result of the parsing.
10813
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10815
xmlCharEncoding enc;
10817
if ((ctxt == NULL) || (ctxt->input == NULL))
10820
xmlDefaultSAXHandlerInit();
10822
xmlDetectSAX2(ctxt);
10827
* SAX: beginning of the document processing.
10829
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10833
* Get the 4 first bytes and decode the charset
10834
* if enc != XML_CHAR_ENCODING_NONE
10835
* plug some encoding conversion routines.
10837
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10842
enc = xmlDetectCharEncoding(start, 4);
10843
if (enc != XML_CHAR_ENCODING_NONE) {
10844
xmlSwitchEncoding(ctxt, enc);
10850
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
* Check for the XMLDecl in the Prolog.
10857
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10860
* Note that we will switch encoding on the fly.
10862
xmlParseXMLDecl(ctxt);
10863
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10865
* The XML REC instructs us to stop parsing right here
10871
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10873
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10874
ctxt->sax->startDocument(ctxt->userData);
10875
if (ctxt->instate == XML_PARSER_EOF)
10879
* Doing validity checking on chunk doesn't make sense
10881
ctxt->instate = XML_PARSER_CONTENT;
10882
ctxt->validate = 0;
10883
ctxt->loadsubset = 0;
10886
xmlParseContent(ctxt);
10887
if (ctxt->instate == XML_PARSER_EOF)
10890
if ((RAW == '<') && (NXT(1) == '/')) {
10891
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10892
} else if (RAW != 0) {
10893
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10897
* SAX: end of the document processing.
10899
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10900
ctxt->sax->endDocument(ctxt->userData);
10902
if (! ctxt->wellFormed) return(-1);
10906
#ifdef LIBXML_PUSH_ENABLED
10907
/************************************************************************
10909
* Progressive parsing interfaces *
10911
************************************************************************/
10914
* xmlParseLookupSequence:
10915
* @ctxt: an XML parser context
10916
* @first: the first char to lookup
10917
* @next: the next char to lookup or zero
10918
* @third: the next char to lookup or zero
10920
* Try to find if a sequence (first, next, third) or just (first next) or
10921
* (first) is available in the input stream.
10922
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
10923
* to avoid rescanning sequences of bytes, it DOES change the state of the
10924
* parser, do not use liberally.
10926
* Returns the index to the current parsing point if the full sequence
10927
* is available, -1 otherwise.
10930
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10931
xmlChar next, xmlChar third) {
10933
xmlParserInputPtr in;
10934
const xmlChar *buf;
10937
if (in == NULL) return(-1);
10938
base = in->cur - in->base;
10939
if (base < 0) return(-1);
10940
if (ctxt->checkIndex > base)
10941
base = ctxt->checkIndex;
10942
if (in->buf == NULL) {
10946
buf = xmlBufContent(in->buf->buffer);
10947
len = xmlBufUse(in->buf->buffer);
10949
/* take into account the sequence length */
10950
if (third) len -= 2;
10951
else if (next) len --;
10952
for (;base < len;base++) {
10953
if (buf[base] == first) {
10955
if ((buf[base + 1] != next) ||
10956
(buf[base + 2] != third)) continue;
10957
} else if (next != 0) {
10958
if (buf[base + 1] != next) continue;
10960
ctxt->checkIndex = 0;
10963
xmlGenericError(xmlGenericErrorContext,
10964
"PP: lookup '%c' found at %d\n",
10966
else if (third == 0)
10967
xmlGenericError(xmlGenericErrorContext,
10968
"PP: lookup '%c%c' found at %d\n",
10969
first, next, base);
10971
xmlGenericError(xmlGenericErrorContext,
10972
"PP: lookup '%c%c%c' found at %d\n",
10973
first, next, third, base);
10975
return(base - (in->cur - in->base));
10978
ctxt->checkIndex = base;
10981
xmlGenericError(xmlGenericErrorContext,
10982
"PP: lookup '%c' failed\n", first);
10983
else if (third == 0)
10984
xmlGenericError(xmlGenericErrorContext,
10985
"PP: lookup '%c%c' failed\n", first, next);
10987
xmlGenericError(xmlGenericErrorContext,
10988
"PP: lookup '%c%c%c' failed\n", first, next, third);
10994
* xmlParseGetLasts:
10995
* @ctxt: an XML parser context
10996
* @lastlt: pointer to store the last '<' from the input
10997
* @lastgt: pointer to store the last '>' from the input
10999
* Lookup the last < and > in the current chunk
11002
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11003
const xmlChar **lastgt) {
11004
const xmlChar *tmp;
11006
if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11007
xmlGenericError(xmlGenericErrorContext,
11008
"Internal error: xmlParseGetLasts\n");
11011
if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11012
tmp = ctxt->input->end;
11014
while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11015
if (tmp < ctxt->input->base) {
11021
while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11022
if (*tmp == '\'') {
11024
while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11025
if (tmp < ctxt->input->end) tmp++;
11026
} else if (*tmp == '"') {
11028
while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11029
if (tmp < ctxt->input->end) tmp++;
11033
if (tmp < ctxt->input->end)
11038
while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11039
if (tmp >= ctxt->input->base)
11051
* xmlCheckCdataPush:
11052
* @cur: pointer to the bock of characters
11053
* @len: length of the block in bytes
11055
* Check that the block of characters is okay as SCdata content [20]
11057
* Returns the number of bytes to pass if okay, a negative index where an
11058
* UTF-8 error occured otherwise
11061
xmlCheckCdataPush(const xmlChar *utf, int len) {
11066
if ((utf == NULL) || (len <= 0))
11069
for (ix = 0; ix < len;) { /* string is 0-terminated */
11071
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11074
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11078
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11079
if (ix + 2 > len) return(ix);
11080
if ((utf[ix+1] & 0xc0 ) != 0x80)
11082
codepoint = (utf[ix] & 0x1f) << 6;
11083
codepoint |= utf[ix+1] & 0x3f;
11084
if (!xmlIsCharQ(codepoint))
11087
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11088
if (ix + 3 > len) return(ix);
11089
if (((utf[ix+1] & 0xc0) != 0x80) ||
11090
((utf[ix+2] & 0xc0) != 0x80))
11092
codepoint = (utf[ix] & 0xf) << 12;
11093
codepoint |= (utf[ix+1] & 0x3f) << 6;
11094
codepoint |= utf[ix+2] & 0x3f;
11095
if (!xmlIsCharQ(codepoint))
11098
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11099
if (ix + 4 > len) return(ix);
11100
if (((utf[ix+1] & 0xc0) != 0x80) ||
11101
((utf[ix+2] & 0xc0) != 0x80) ||
11102
((utf[ix+3] & 0xc0) != 0x80))
11104
codepoint = (utf[ix] & 0x7) << 18;
11105
codepoint |= (utf[ix+1] & 0x3f) << 12;
11106
codepoint |= (utf[ix+2] & 0x3f) << 6;
11107
codepoint |= utf[ix+3] & 0x3f;
11108
if (!xmlIsCharQ(codepoint))
11111
} else /* unknown encoding */
11118
* xmlParseTryOrFinish:
11119
* @ctxt: an XML parser context
11120
* @terminate: last chunk indicator
11122
* Try to progress on parsing
11124
* Returns zero if no parsing was possible
11127
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11131
const xmlChar *lastlt, *lastgt;
11133
if (ctxt->input == NULL)
11137
switch (ctxt->instate) {
11138
case XML_PARSER_EOF:
11139
xmlGenericError(xmlGenericErrorContext,
11140
"PP: try EOF\n"); break;
11141
case XML_PARSER_START:
11142
xmlGenericError(xmlGenericErrorContext,
11143
"PP: try START\n"); break;
11144
case XML_PARSER_MISC:
11145
xmlGenericError(xmlGenericErrorContext,
11146
"PP: try MISC\n");break;
11147
case XML_PARSER_COMMENT:
11148
xmlGenericError(xmlGenericErrorContext,
11149
"PP: try COMMENT\n");break;
11150
case XML_PARSER_PROLOG:
11151
xmlGenericError(xmlGenericErrorContext,
11152
"PP: try PROLOG\n");break;
11153
case XML_PARSER_START_TAG:
11154
xmlGenericError(xmlGenericErrorContext,
11155
"PP: try START_TAG\n");break;
11156
case XML_PARSER_CONTENT:
11157
xmlGenericError(xmlGenericErrorContext,
11158
"PP: try CONTENT\n");break;
11159
case XML_PARSER_CDATA_SECTION:
11160
xmlGenericError(xmlGenericErrorContext,
11161
"PP: try CDATA_SECTION\n");break;
11162
case XML_PARSER_END_TAG:
11163
xmlGenericError(xmlGenericErrorContext,
11164
"PP: try END_TAG\n");break;
11165
case XML_PARSER_ENTITY_DECL:
11166
xmlGenericError(xmlGenericErrorContext,
11167
"PP: try ENTITY_DECL\n");break;
11168
case XML_PARSER_ENTITY_VALUE:
11169
xmlGenericError(xmlGenericErrorContext,
11170
"PP: try ENTITY_VALUE\n");break;
11171
case XML_PARSER_ATTRIBUTE_VALUE:
11172
xmlGenericError(xmlGenericErrorContext,
11173
"PP: try ATTRIBUTE_VALUE\n");break;
11174
case XML_PARSER_DTD:
11175
xmlGenericError(xmlGenericErrorContext,
11176
"PP: try DTD\n");break;
11177
case XML_PARSER_EPILOG:
11178
xmlGenericError(xmlGenericErrorContext,
11179
"PP: try EPILOG\n");break;
11180
case XML_PARSER_PI:
11181
xmlGenericError(xmlGenericErrorContext,
11182
"PP: try PI\n");break;
11183
case XML_PARSER_IGNORE:
11184
xmlGenericError(xmlGenericErrorContext,
11185
"PP: try IGNORE\n");break;
11189
if ((ctxt->input != NULL) &&
11190
(ctxt->input->cur - ctxt->input->base > 4096)) {
11192
ctxt->checkIndex = 0;
11194
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11196
while (ctxt->instate != XML_PARSER_EOF) {
11197
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11202
* Pop-up of finished entities.
11204
while ((RAW == 0) && (ctxt->inputNr > 1))
11207
if (ctxt->input == NULL) break;
11208
if (ctxt->input->buf == NULL)
11209
avail = ctxt->input->length -
11210
(ctxt->input->cur - ctxt->input->base);
11213
* If we are operating on converted input, try to flush
11214
* remainng chars to avoid them stalling in the non-converted
11215
* buffer. But do not do this in document start where
11216
* encoding="..." may not have been read and we work on a
11217
* guessed encoding.
11219
if ((ctxt->instate != XML_PARSER_START) &&
11220
(ctxt->input->buf->raw != NULL) &&
11221
(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11222
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11224
size_t current = ctxt->input->cur - ctxt->input->base;
11226
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11227
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11230
avail = xmlBufUse(ctxt->input->buf->buffer) -
11231
(ctxt->input->cur - ctxt->input->base);
11235
switch (ctxt->instate) {
11236
case XML_PARSER_EOF:
11238
* Document parsing is done !
11241
case XML_PARSER_START:
11242
if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11244
xmlCharEncoding enc;
11247
* Very first chars read from the document flow.
11253
* Get the 4 first bytes and decode the charset
11254
* if enc != XML_CHAR_ENCODING_NONE
11255
* plug some encoding conversion routines,
11256
* else xmlSwitchEncoding will set to (default)
11263
enc = xmlDetectCharEncoding(start, 4);
11264
xmlSwitchEncoding(ctxt, enc);
11270
cur = ctxt->input->cur[0];
11271
next = ctxt->input->cur[1];
11273
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11274
ctxt->sax->setDocumentLocator(ctxt->userData,
11275
&xmlDefaultSAXLocator);
11276
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11277
ctxt->instate = XML_PARSER_EOF;
11279
xmlGenericError(xmlGenericErrorContext,
11280
"PP: entering EOF\n");
11282
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11283
ctxt->sax->endDocument(ctxt->userData);
11286
if ((cur == '<') && (next == '?')) {
11287
/* PI or XML decl */
11288
if (avail < 5) return(ret);
11289
if ((!terminate) &&
11290
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11292
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11293
ctxt->sax->setDocumentLocator(ctxt->userData,
11294
&xmlDefaultSAXLocator);
11295
if ((ctxt->input->cur[2] == 'x') &&
11296
(ctxt->input->cur[3] == 'm') &&
11297
(ctxt->input->cur[4] == 'l') &&
11298
(IS_BLANK_CH(ctxt->input->cur[5]))) {
11301
xmlGenericError(xmlGenericErrorContext,
11302
"PP: Parsing XML Decl\n");
11304
xmlParseXMLDecl(ctxt);
11305
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11307
* The XML REC instructs us to stop parsing right
11310
ctxt->instate = XML_PARSER_EOF;
11313
ctxt->standalone = ctxt->input->standalone;
11314
if ((ctxt->encoding == NULL) &&
11315
(ctxt->input->encoding != NULL))
11316
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11317
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11318
(!ctxt->disableSAX))
11319
ctxt->sax->startDocument(ctxt->userData);
11320
ctxt->instate = XML_PARSER_MISC;
11322
xmlGenericError(xmlGenericErrorContext,
11323
"PP: entering MISC\n");
11326
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11327
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11328
(!ctxt->disableSAX))
11329
ctxt->sax->startDocument(ctxt->userData);
11330
ctxt->instate = XML_PARSER_MISC;
11332
xmlGenericError(xmlGenericErrorContext,
11333
"PP: entering MISC\n");
11337
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11338
ctxt->sax->setDocumentLocator(ctxt->userData,
11339
&xmlDefaultSAXLocator);
11340
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11341
if (ctxt->version == NULL) {
11342
xmlErrMemory(ctxt, NULL);
11345
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11346
(!ctxt->disableSAX))
11347
ctxt->sax->startDocument(ctxt->userData);
11348
ctxt->instate = XML_PARSER_MISC;
11350
xmlGenericError(xmlGenericErrorContext,
11351
"PP: entering MISC\n");
11355
case XML_PARSER_START_TAG: {
11356
const xmlChar *name;
11357
const xmlChar *prefix = NULL;
11358
const xmlChar *URI = NULL;
11359
int nsNr = ctxt->nsNr;
11361
if ((avail < 2) && (ctxt->inputNr == 1))
11363
cur = ctxt->input->cur[0];
11365
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11366
ctxt->instate = XML_PARSER_EOF;
11367
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11368
ctxt->sax->endDocument(ctxt->userData);
11372
if (ctxt->progressive) {
11373
/* > can be found unescaped in attribute values */
11374
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11376
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11380
if (ctxt->spaceNr == 0)
11381
spacePush(ctxt, -1);
11382
else if (*ctxt->space == -2)
11383
spacePush(ctxt, -1);
11385
spacePush(ctxt, *ctxt->space);
11386
#ifdef LIBXML_SAX1_ENABLED
11388
#endif /* LIBXML_SAX1_ENABLED */
11389
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11390
#ifdef LIBXML_SAX1_ENABLED
11392
name = xmlParseStartTag(ctxt);
11393
#endif /* LIBXML_SAX1_ENABLED */
11394
if (ctxt->instate == XML_PARSER_EOF)
11396
if (name == NULL) {
11398
ctxt->instate = XML_PARSER_EOF;
11399
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11400
ctxt->sax->endDocument(ctxt->userData);
11403
#ifdef LIBXML_VALID_ENABLED
11405
* [ VC: Root Element Type ]
11406
* The Name in the document type declaration must match
11407
* the element type of the root element.
11409
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11410
ctxt->node && (ctxt->node == ctxt->myDoc->children))
11411
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11412
#endif /* LIBXML_VALID_ENABLED */
11415
* Check for an Empty Element.
11417
if ((RAW == '/') && (NXT(1) == '>')) {
11421
if ((ctxt->sax != NULL) &&
11422
(ctxt->sax->endElementNs != NULL) &&
11423
(!ctxt->disableSAX))
11424
ctxt->sax->endElementNs(ctxt->userData, name,
11426
if (ctxt->nsNr - nsNr > 0)
11427
nsPop(ctxt, ctxt->nsNr - nsNr);
11428
#ifdef LIBXML_SAX1_ENABLED
11430
if ((ctxt->sax != NULL) &&
11431
(ctxt->sax->endElement != NULL) &&
11432
(!ctxt->disableSAX))
11433
ctxt->sax->endElement(ctxt->userData, name);
11434
#endif /* LIBXML_SAX1_ENABLED */
11436
if (ctxt->instate == XML_PARSER_EOF)
11439
if (ctxt->nameNr == 0) {
11440
ctxt->instate = XML_PARSER_EPILOG;
11442
ctxt->instate = XML_PARSER_CONTENT;
11444
ctxt->progressive = 1;
11450
xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11451
"Couldn't find end of Start Tag %s\n",
11457
nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11458
#ifdef LIBXML_SAX1_ENABLED
11460
namePush(ctxt, name);
11461
#endif /* LIBXML_SAX1_ENABLED */
11463
ctxt->instate = XML_PARSER_CONTENT;
11464
ctxt->progressive = 1;
11467
case XML_PARSER_CONTENT: {
11468
const xmlChar *test;
11470
if ((avail < 2) && (ctxt->inputNr == 1))
11472
cur = ctxt->input->cur[0];
11473
next = ctxt->input->cur[1];
11476
cons = ctxt->input->consumed;
11477
if ((cur == '<') && (next == '/')) {
11478
ctxt->instate = XML_PARSER_END_TAG;
11480
} else if ((cur == '<') && (next == '?')) {
11481
if ((!terminate) &&
11482
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11483
ctxt->progressive = XML_PARSER_PI;
11487
ctxt->instate = XML_PARSER_CONTENT;
11488
ctxt->progressive = 1;
11489
} else if ((cur == '<') && (next != '!')) {
11490
ctxt->instate = XML_PARSER_START_TAG;
11492
} else if ((cur == '<') && (next == '!') &&
11493
(ctxt->input->cur[2] == '-') &&
11494
(ctxt->input->cur[3] == '-')) {
11499
ctxt->input->cur += 4;
11500
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11501
ctxt->input->cur -= 4;
11502
if ((!terminate) && (term < 0)) {
11503
ctxt->progressive = XML_PARSER_COMMENT;
11506
xmlParseComment(ctxt);
11507
ctxt->instate = XML_PARSER_CONTENT;
11508
ctxt->progressive = 1;
11509
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11510
(ctxt->input->cur[2] == '[') &&
11511
(ctxt->input->cur[3] == 'C') &&
11512
(ctxt->input->cur[4] == 'D') &&
11513
(ctxt->input->cur[5] == 'A') &&
11514
(ctxt->input->cur[6] == 'T') &&
11515
(ctxt->input->cur[7] == 'A') &&
11516
(ctxt->input->cur[8] == '[')) {
11518
ctxt->instate = XML_PARSER_CDATA_SECTION;
11520
} else if ((cur == '<') && (next == '!') &&
11523
} else if (cur == '&') {
11524
if ((!terminate) &&
11525
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11527
xmlParseReference(ctxt);
11529
/* TODO Avoid the extra copy, handle directly !!! */
11531
* Goal of the following test is:
11532
* - minimize calls to the SAX 'character' callback
11533
* when they are mergeable
11534
* - handle an problem for isBlank when we only parse
11535
* a sequence of blank chars and the next one is
11536
* not available to check against '<' presence.
11537
* - tries to homogenize the differences in SAX
11538
* callbacks between the push and pull versions
11541
if ((ctxt->inputNr == 1) &&
11542
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11544
if (ctxt->progressive) {
11545
if ((lastlt == NULL) ||
11546
(ctxt->input->cur > lastlt))
11548
} else if (xmlParseLookupSequence(ctxt,
11554
ctxt->checkIndex = 0;
11555
xmlParseCharData(ctxt, 0);
11558
* Pop-up of finished entities.
11560
while ((RAW == 0) && (ctxt->inputNr > 1))
11562
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11563
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11564
"detected an error in element content\n");
11565
ctxt->instate = XML_PARSER_EOF;
11570
case XML_PARSER_END_TAG:
11574
if (ctxt->progressive) {
11575
/* > can be found unescaped in attribute values */
11576
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11578
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11583
xmlParseEndTag2(ctxt,
11584
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11585
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11586
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11589
#ifdef LIBXML_SAX1_ENABLED
11591
xmlParseEndTag1(ctxt, 0);
11592
#endif /* LIBXML_SAX1_ENABLED */
11593
if (ctxt->instate == XML_PARSER_EOF) {
11595
} else if (ctxt->nameNr == 0) {
11596
ctxt->instate = XML_PARSER_EPILOG;
11598
ctxt->instate = XML_PARSER_CONTENT;
11601
case XML_PARSER_CDATA_SECTION: {
11603
* The Push mode need to have the SAX callback for
11604
* cdataBlock merge back contiguous callbacks.
11608
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11610
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11613
tmp = xmlCheckCdataPush(ctxt->input->cur,
11614
XML_PARSER_BIG_BUFFER_SIZE);
11617
ctxt->input->cur += tmp;
11618
goto encoding_error;
11620
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11621
if (ctxt->sax->cdataBlock != NULL)
11622
ctxt->sax->cdataBlock(ctxt->userData,
11623
ctxt->input->cur, tmp);
11624
else if (ctxt->sax->characters != NULL)
11625
ctxt->sax->characters(ctxt->userData,
11626
ctxt->input->cur, tmp);
11628
if (ctxt->instate == XML_PARSER_EOF)
11631
ctxt->checkIndex = 0;
11637
tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11638
if ((tmp < 0) || (tmp != base)) {
11640
ctxt->input->cur += tmp;
11641
goto encoding_error;
11643
if ((ctxt->sax != NULL) && (base == 0) &&
11644
(ctxt->sax->cdataBlock != NULL) &&
11645
(!ctxt->disableSAX)) {
11647
* Special case to provide identical behaviour
11648
* between pull and push parsers on enpty CDATA
11651
if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11652
(!strncmp((const char *)&ctxt->input->cur[-9],
11654
ctxt->sax->cdataBlock(ctxt->userData,
11656
} else if ((ctxt->sax != NULL) && (base > 0) &&
11657
(!ctxt->disableSAX)) {
11658
if (ctxt->sax->cdataBlock != NULL)
11659
ctxt->sax->cdataBlock(ctxt->userData,
11660
ctxt->input->cur, base);
11661
else if (ctxt->sax->characters != NULL)
11662
ctxt->sax->characters(ctxt->userData,
11663
ctxt->input->cur, base);
11665
if (ctxt->instate == XML_PARSER_EOF)
11668
ctxt->checkIndex = 0;
11669
ctxt->instate = XML_PARSER_CONTENT;
11671
xmlGenericError(xmlGenericErrorContext,
11672
"PP: entering CONTENT\n");
11677
case XML_PARSER_MISC:
11679
if (ctxt->input->buf == NULL)
11680
avail = ctxt->input->length -
11681
(ctxt->input->cur - ctxt->input->base);
11683
avail = xmlBufUse(ctxt->input->buf->buffer) -
11684
(ctxt->input->cur - ctxt->input->base);
11687
cur = ctxt->input->cur[0];
11688
next = ctxt->input->cur[1];
11689
if ((cur == '<') && (next == '?')) {
11690
if ((!terminate) &&
11691
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11692
ctxt->progressive = XML_PARSER_PI;
11696
xmlGenericError(xmlGenericErrorContext,
11697
"PP: Parsing PI\n");
11700
if (ctxt->instate == XML_PARSER_EOF)
11702
ctxt->instate = XML_PARSER_MISC;
11703
ctxt->progressive = 1;
11704
ctxt->checkIndex = 0;
11705
} else if ((cur == '<') && (next == '!') &&
11706
(ctxt->input->cur[2] == '-') &&
11707
(ctxt->input->cur[3] == '-')) {
11708
if ((!terminate) &&
11709
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11710
ctxt->progressive = XML_PARSER_COMMENT;
11714
xmlGenericError(xmlGenericErrorContext,
11715
"PP: Parsing Comment\n");
11717
xmlParseComment(ctxt);
11718
if (ctxt->instate == XML_PARSER_EOF)
11720
ctxt->instate = XML_PARSER_MISC;
11721
ctxt->progressive = 1;
11722
ctxt->checkIndex = 0;
11723
} else if ((cur == '<') && (next == '!') &&
11724
(ctxt->input->cur[2] == 'D') &&
11725
(ctxt->input->cur[3] == 'O') &&
11726
(ctxt->input->cur[4] == 'C') &&
11727
(ctxt->input->cur[5] == 'T') &&
11728
(ctxt->input->cur[6] == 'Y') &&
11729
(ctxt->input->cur[7] == 'P') &&
11730
(ctxt->input->cur[8] == 'E')) {
11731
if ((!terminate) &&
11732
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11733
ctxt->progressive = XML_PARSER_DTD;
11737
xmlGenericError(xmlGenericErrorContext,
11738
"PP: Parsing internal subset\n");
11740
ctxt->inSubset = 1;
11741
ctxt->progressive = 0;
11742
ctxt->checkIndex = 0;
11743
xmlParseDocTypeDecl(ctxt);
11744
if (ctxt->instate == XML_PARSER_EOF)
11747
ctxt->instate = XML_PARSER_DTD;
11749
xmlGenericError(xmlGenericErrorContext,
11750
"PP: entering DTD\n");
11754
* Create and update the external subset.
11756
ctxt->inSubset = 2;
11757
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11758
(ctxt->sax->externalSubset != NULL))
11759
ctxt->sax->externalSubset(ctxt->userData,
11760
ctxt->intSubName, ctxt->extSubSystem,
11762
ctxt->inSubset = 0;
11763
xmlCleanSpecialAttr(ctxt);
11764
ctxt->instate = XML_PARSER_PROLOG;
11766
xmlGenericError(xmlGenericErrorContext,
11767
"PP: entering PROLOG\n");
11770
} else if ((cur == '<') && (next == '!') &&
11774
ctxt->instate = XML_PARSER_START_TAG;
11775
ctxt->progressive = XML_PARSER_START_TAG;
11776
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11778
xmlGenericError(xmlGenericErrorContext,
11779
"PP: entering START_TAG\n");
11783
case XML_PARSER_PROLOG:
11785
if (ctxt->input->buf == NULL)
11786
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11788
avail = xmlBufUse(ctxt->input->buf->buffer) -
11789
(ctxt->input->cur - ctxt->input->base);
11792
cur = ctxt->input->cur[0];
11793
next = ctxt->input->cur[1];
11794
if ((cur == '<') && (next == '?')) {
11795
if ((!terminate) &&
11796
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11797
ctxt->progressive = XML_PARSER_PI;
11801
xmlGenericError(xmlGenericErrorContext,
11802
"PP: Parsing PI\n");
11805
if (ctxt->instate == XML_PARSER_EOF)
11807
ctxt->instate = XML_PARSER_PROLOG;
11808
ctxt->progressive = 1;
11809
} else if ((cur == '<') && (next == '!') &&
11810
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11811
if ((!terminate) &&
11812
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11813
ctxt->progressive = XML_PARSER_COMMENT;
11817
xmlGenericError(xmlGenericErrorContext,
11818
"PP: Parsing Comment\n");
11820
xmlParseComment(ctxt);
11821
if (ctxt->instate == XML_PARSER_EOF)
11823
ctxt->instate = XML_PARSER_PROLOG;
11824
ctxt->progressive = 1;
11825
} else if ((cur == '<') && (next == '!') &&
11829
ctxt->instate = XML_PARSER_START_TAG;
11830
if (ctxt->progressive == 0)
11831
ctxt->progressive = XML_PARSER_START_TAG;
11832
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11834
xmlGenericError(xmlGenericErrorContext,
11835
"PP: entering START_TAG\n");
11839
case XML_PARSER_EPILOG:
11841
if (ctxt->input->buf == NULL)
11842
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11844
avail = xmlBufUse(ctxt->input->buf->buffer) -
11845
(ctxt->input->cur - ctxt->input->base);
11848
cur = ctxt->input->cur[0];
11849
next = ctxt->input->cur[1];
11850
if ((cur == '<') && (next == '?')) {
11851
if ((!terminate) &&
11852
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11853
ctxt->progressive = XML_PARSER_PI;
11857
xmlGenericError(xmlGenericErrorContext,
11858
"PP: Parsing PI\n");
11861
if (ctxt->instate == XML_PARSER_EOF)
11863
ctxt->instate = XML_PARSER_EPILOG;
11864
ctxt->progressive = 1;
11865
} else if ((cur == '<') && (next == '!') &&
11866
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11867
if ((!terminate) &&
11868
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11869
ctxt->progressive = XML_PARSER_COMMENT;
11873
xmlGenericError(xmlGenericErrorContext,
11874
"PP: Parsing Comment\n");
11876
xmlParseComment(ctxt);
11877
if (ctxt->instate == XML_PARSER_EOF)
11879
ctxt->instate = XML_PARSER_EPILOG;
11880
ctxt->progressive = 1;
11881
} else if ((cur == '<') && (next == '!') &&
11885
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11886
ctxt->instate = XML_PARSER_EOF;
11888
xmlGenericError(xmlGenericErrorContext,
11889
"PP: entering EOF\n");
11891
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11892
ctxt->sax->endDocument(ctxt->userData);
11896
case XML_PARSER_DTD: {
11898
* Sorry but progressive parsing of the internal subset
11899
* is not expected to be supported. We first check that
11900
* the full content of the internal subset is available and
11901
* the parsing is launched only at that point.
11902
* Internal subset ends up with "']' S? '>'" in an unescaped
11903
* section and not in a ']]>' sequence which are conditional
11904
* sections (whoever argued to keep that crap in XML deserve
11905
* a place in hell !).
11912
base = ctxt->input->cur - ctxt->input->base;
11913
if (base < 0) return(0);
11914
if (ctxt->checkIndex > base)
11915
base = ctxt->checkIndex;
11916
buf = xmlBufContent(ctxt->input->buf->buffer);
11917
use = xmlBufUse(ctxt->input->buf->buffer);
11918
for (;(unsigned int) base < use; base++) {
11920
if (buf[base] == quote)
11924
if ((quote == 0) && (buf[base] == '<')) {
11926
/* special handling of comments */
11927
if (((unsigned int) base + 4 < use) &&
11928
(buf[base + 1] == '!') &&
11929
(buf[base + 2] == '-') &&
11930
(buf[base + 3] == '-')) {
11931
for (;(unsigned int) base + 3 < use; base++) {
11932
if ((buf[base] == '-') &&
11933
(buf[base + 1] == '-') &&
11934
(buf[base + 2] == '>')) {
11942
fprintf(stderr, "unfinished comment\n");
11949
if (buf[base] == '"') {
11953
if (buf[base] == '\'') {
11957
if (buf[base] == ']') {
11959
fprintf(stderr, "%c%c%c%c: ", buf[base],
11960
buf[base + 1], buf[base + 2], buf[base + 3]);
11962
if ((unsigned int) base +1 >= use)
11964
if (buf[base + 1] == ']') {
11965
/* conditional crap, skip both ']' ! */
11969
for (i = 1; (unsigned int) base + i < use; i++) {
11970
if (buf[base + i] == '>') {
11972
fprintf(stderr, "found\n");
11974
goto found_end_int_subset;
11976
if (!IS_BLANK_CH(buf[base + i])) {
11978
fprintf(stderr, "not found\n");
11980
goto not_end_of_int_subset;
11984
fprintf(stderr, "end of stream\n");
11989
not_end_of_int_subset:
11990
continue; /* for */
11993
* We didn't found the end of the Internal subset
11996
ctxt->checkIndex = base;
11998
ctxt->checkIndex = 0;
12001
xmlGenericError(xmlGenericErrorContext,
12002
"PP: lookup of int subset end filed\n");
12006
found_end_int_subset:
12007
ctxt->checkIndex = 0;
12008
xmlParseInternalSubset(ctxt);
12009
if (ctxt->instate == XML_PARSER_EOF)
12011
ctxt->inSubset = 2;
12012
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12013
(ctxt->sax->externalSubset != NULL))
12014
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12015
ctxt->extSubSystem, ctxt->extSubURI);
12016
ctxt->inSubset = 0;
12017
xmlCleanSpecialAttr(ctxt);
12018
if (ctxt->instate == XML_PARSER_EOF)
12020
ctxt->instate = XML_PARSER_PROLOG;
12021
ctxt->checkIndex = 0;
12023
xmlGenericError(xmlGenericErrorContext,
12024
"PP: entering PROLOG\n");
12028
case XML_PARSER_COMMENT:
12029
xmlGenericError(xmlGenericErrorContext,
12030
"PP: internal error, state == COMMENT\n");
12031
ctxt->instate = XML_PARSER_CONTENT;
12033
xmlGenericError(xmlGenericErrorContext,
12034
"PP: entering CONTENT\n");
12037
case XML_PARSER_IGNORE:
12038
xmlGenericError(xmlGenericErrorContext,
12039
"PP: internal error, state == IGNORE");
12040
ctxt->instate = XML_PARSER_DTD;
12042
xmlGenericError(xmlGenericErrorContext,
12043
"PP: entering DTD\n");
12046
case XML_PARSER_PI:
12047
xmlGenericError(xmlGenericErrorContext,
12048
"PP: internal error, state == PI\n");
12049
ctxt->instate = XML_PARSER_CONTENT;
12051
xmlGenericError(xmlGenericErrorContext,
12052
"PP: entering CONTENT\n");
12055
case XML_PARSER_ENTITY_DECL:
12056
xmlGenericError(xmlGenericErrorContext,
12057
"PP: internal error, state == ENTITY_DECL\n");
12058
ctxt->instate = XML_PARSER_DTD;
12060
xmlGenericError(xmlGenericErrorContext,
12061
"PP: entering DTD\n");
12064
case XML_PARSER_ENTITY_VALUE:
12065
xmlGenericError(xmlGenericErrorContext,
12066
"PP: internal error, state == ENTITY_VALUE\n");
12067
ctxt->instate = XML_PARSER_CONTENT;
12069
xmlGenericError(xmlGenericErrorContext,
12070
"PP: entering DTD\n");
12073
case XML_PARSER_ATTRIBUTE_VALUE:
12074
xmlGenericError(xmlGenericErrorContext,
12075
"PP: internal error, state == ATTRIBUTE_VALUE\n");
12076
ctxt->instate = XML_PARSER_START_TAG;
12078
xmlGenericError(xmlGenericErrorContext,
12079
"PP: entering START_TAG\n");
12082
case XML_PARSER_SYSTEM_LITERAL:
12083
xmlGenericError(xmlGenericErrorContext,
12084
"PP: internal error, state == SYSTEM_LITERAL\n");
12085
ctxt->instate = XML_PARSER_START_TAG;
12087
xmlGenericError(xmlGenericErrorContext,
12088
"PP: entering START_TAG\n");
12091
case XML_PARSER_PUBLIC_LITERAL:
12092
xmlGenericError(xmlGenericErrorContext,
12093
"PP: internal error, state == PUBLIC_LITERAL\n");
12094
ctxt->instate = XML_PARSER_START_TAG;
12096
xmlGenericError(xmlGenericErrorContext,
12097
"PP: entering START_TAG\n");
12104
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12111
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12112
ctxt->input->cur[0], ctxt->input->cur[1],
12113
ctxt->input->cur[2], ctxt->input->cur[3]);
12114
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12115
"Input is not proper UTF-8, indicate encoding !\n%s",
12116
BAD_CAST buffer, NULL);
12122
* xmlParseCheckTransition:
12123
* @ctxt: an XML parser context
12124
* @chunk: a char array
12125
* @size: the size in byte of the chunk
12127
* Check depending on the current parser state if the chunk given must be
12128
* processed immediately or one need more data to advance on parsing.
12130
* Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12133
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12134
if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12136
if (ctxt->instate == XML_PARSER_START_TAG) {
12137
if (memchr(chunk, '>', size) != NULL)
12141
if (ctxt->progressive == XML_PARSER_COMMENT) {
12142
if (memchr(chunk, '>', size) != NULL)
12146
if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12147
if (memchr(chunk, '>', size) != NULL)
12151
if (ctxt->progressive == XML_PARSER_PI) {
12152
if (memchr(chunk, '>', size) != NULL)
12156
if (ctxt->instate == XML_PARSER_END_TAG) {
12157
if (memchr(chunk, '>', size) != NULL)
12161
if ((ctxt->progressive == XML_PARSER_DTD) ||
12162
(ctxt->instate == XML_PARSER_DTD)) {
12163
if (memchr(chunk, '>', size) != NULL)
12172
* @ctxt: an XML parser context
12173
* @chunk: an char array
12174
* @size: the size in byte of the chunk
12175
* @terminate: last chunk indicator
12177
* Parse a Chunk of memory
12179
* Returns zero if no error, the xmlParserErrors otherwise.
12182
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12186
size_t old_avail = 0;
12190
return(XML_ERR_INTERNAL_ERROR);
12191
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12192
return(ctxt->errNo);
12193
if (ctxt->instate == XML_PARSER_EOF)
12195
if (ctxt->instate == XML_PARSER_START)
12196
xmlDetectSAX2(ctxt);
12197
if ((size > 0) && (chunk != NULL) && (!terminate) &&
12198
(chunk[size - 1] == '\r')) {
12205
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12206
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12207
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12208
size_t cur = ctxt->input->cur - ctxt->input->base;
12211
old_avail = xmlBufUse(ctxt->input->buf->buffer);
12213
* Specific handling if we autodetected an encoding, we should not
12214
* push more than the first line ... which depend on the encoding
12215
* And only push the rest once the final encoding was detected
12217
if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12218
(ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12219
unsigned int len = 45;
12221
if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12222
BAD_CAST "UTF-16")) ||
12223
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12224
BAD_CAST "UTF16")))
12226
else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12227
BAD_CAST "UCS-4")) ||
12228
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12232
if (ctxt->input->buf->rawconsumed < len)
12233
len -= ctxt->input->buf->rawconsumed;
12236
* Change size for reading the initial declaration only
12237
* if size is greater than len. Otherwise, memmove in xmlBufferAdd
12238
* will blindly copy extra bytes from memory.
12240
if ((unsigned int) size > len) {
12241
remain = size - len;
12247
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12249
ctxt->errNo = XML_PARSER_EOF;
12250
ctxt->disableSAX = 1;
12251
return (XML_PARSER_EOF);
12253
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12255
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12258
} else if (ctxt->instate != XML_PARSER_EOF) {
12259
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12260
xmlParserInputBufferPtr in = ctxt->input->buf;
12261
if ((in->encoder != NULL) && (in->buffer != NULL) &&
12262
(in->raw != NULL)) {
12264
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12265
size_t current = ctxt->input->cur - ctxt->input->base;
12267
nbchars = xmlCharEncInput(in, terminate);
12270
xmlGenericError(xmlGenericErrorContext,
12271
"xmlParseChunk: encoder error\n");
12272
return(XML_ERR_INVALID_ENCODING);
12274
xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12279
xmlParseTryOrFinish(ctxt, 0);
12281
if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12282
avail = xmlBufUse(ctxt->input->buf->buffer);
12284
* Depending on the current state it may not be such
12285
* a good idea to try parsing if there is nothing in the chunk
12286
* which would be worth doing a parser state transition and we
12287
* need to wait for more data
12289
if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12290
(old_avail == 0) || (avail == 0) ||
12291
(xmlParseCheckTransition(ctxt,
12292
(const char *)&ctxt->input->base[old_avail],
12293
avail - old_avail)))
12294
xmlParseTryOrFinish(ctxt, terminate);
12296
if (ctxt->instate == XML_PARSER_EOF)
12297
return(ctxt->errNo);
12299
if ((ctxt->input != NULL) &&
12300
(((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12301
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12302
((ctxt->options & XML_PARSE_HUGE) == 0)) {
12303
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12304
ctxt->instate = XML_PARSER_EOF;
12306
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12307
return(ctxt->errNo);
12315
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12316
(ctxt->input->buf != NULL)) {
12317
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12319
size_t current = ctxt->input->cur - ctxt->input->base;
12321
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12323
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12328
* Check for termination
12332
if (ctxt->input != NULL) {
12333
if (ctxt->input->buf == NULL)
12334
cur_avail = ctxt->input->length -
12335
(ctxt->input->cur - ctxt->input->base);
12337
cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12338
(ctxt->input->cur - ctxt->input->base);
12341
if ((ctxt->instate != XML_PARSER_EOF) &&
12342
(ctxt->instate != XML_PARSER_EPILOG)) {
12343
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12345
if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12346
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12348
if (ctxt->instate != XML_PARSER_EOF) {
12349
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12350
ctxt->sax->endDocument(ctxt->userData);
12352
ctxt->instate = XML_PARSER_EOF;
12354
if (ctxt->wellFormed == 0)
12355
return((xmlParserErrors) ctxt->errNo);
12360
/************************************************************************
12362
* I/O front end functions to the parser *
12364
************************************************************************/
12367
* xmlCreatePushParserCtxt:
12368
* @sax: a SAX handler
12369
* @user_data: The user data returned on SAX callbacks
12370
* @chunk: a pointer to an array of chars
12371
* @size: number of chars in the array
12372
* @filename: an optional file name or URI
12374
* Create a parser context for using the XML parser in push mode.
12375
* If @buffer and @size are non-NULL, the data is used to detect
12376
* the encoding. The remaining characters will be parsed so they
12377
* don't need to be fed in again through xmlParseChunk.
12378
* To allow content encoding detection, @size should be >= 4
12379
* The value of @filename is used for fetching external entities
12380
* and error/warning reports.
12382
* Returns the new parser context or NULL
12386
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12387
const char *chunk, int size, const char *filename) {
12388
xmlParserCtxtPtr ctxt;
12389
xmlParserInputPtr inputStream;
12390
xmlParserInputBufferPtr buf;
12391
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12394
* plug some encoding conversion routines
12396
if ((chunk != NULL) && (size >= 4))
12397
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12399
buf = xmlAllocParserInputBuffer(enc);
12400
if (buf == NULL) return(NULL);
12402
ctxt = xmlNewParserCtxt();
12403
if (ctxt == NULL) {
12404
xmlErrMemory(NULL, "creating parser: out of memory\n");
12405
xmlFreeParserInputBuffer(buf);
12408
ctxt->dictNames = 1;
12409
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12410
if (ctxt->pushTab == NULL) {
12411
xmlErrMemory(ctxt, NULL);
12412
xmlFreeParserInputBuffer(buf);
12413
xmlFreeParserCtxt(ctxt);
12417
#ifdef LIBXML_SAX1_ENABLED
12418
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12419
#endif /* LIBXML_SAX1_ENABLED */
12420
xmlFree(ctxt->sax);
12421
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12422
if (ctxt->sax == NULL) {
12423
xmlErrMemory(ctxt, NULL);
12424
xmlFreeParserInputBuffer(buf);
12425
xmlFreeParserCtxt(ctxt);
12428
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12429
if (sax->initialized == XML_SAX2_MAGIC)
12430
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12432
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12433
if (user_data != NULL)
12434
ctxt->userData = user_data;
12436
if (filename == NULL) {
12437
ctxt->directory = NULL;
12439
ctxt->directory = xmlParserGetDirectory(filename);
12442
inputStream = xmlNewInputStream(ctxt);
12443
if (inputStream == NULL) {
12444
xmlFreeParserCtxt(ctxt);
12445
xmlFreeParserInputBuffer(buf);
12449
if (filename == NULL)
12450
inputStream->filename = NULL;
12452
inputStream->filename = (char *)
12453
xmlCanonicPath((const xmlChar *) filename);
12454
if (inputStream->filename == NULL) {
12455
xmlFreeParserCtxt(ctxt);
12456
xmlFreeParserInputBuffer(buf);
12460
inputStream->buf = buf;
12461
xmlBufResetInput(inputStream->buf->buffer, inputStream);
12462
inputPush(ctxt, inputStream);
12465
* If the caller didn't provide an initial 'chunk' for determining
12466
* the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12467
* that it can be automatically determined later
12469
if ((size == 0) || (chunk == NULL)) {
12470
ctxt->charset = XML_CHAR_ENCODING_NONE;
12471
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12472
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12473
size_t cur = ctxt->input->cur - ctxt->input->base;
12475
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12477
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12479
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12483
if (enc != XML_CHAR_ENCODING_NONE) {
12484
xmlSwitchEncoding(ctxt, enc);
12489
#endif /* LIBXML_PUSH_ENABLED */
12493
* @ctxt: an XML parser context
12495
* Blocks further parser processing
12498
xmlStopParser(xmlParserCtxtPtr ctxt) {
12501
ctxt->instate = XML_PARSER_EOF;
12502
ctxt->errNo = XML_ERR_USER_STOP;
12503
ctxt->disableSAX = 1;
12504
if (ctxt->input != NULL) {
12505
ctxt->input->cur = BAD_CAST"";
12506
ctxt->input->base = ctxt->input->cur;
12511
* xmlCreateIOParserCtxt:
12512
* @sax: a SAX handler
12513
* @user_data: The user data returned on SAX callbacks
12514
* @ioread: an I/O read function
12515
* @ioclose: an I/O close function
12516
* @ioctx: an I/O handler
12517
* @enc: the charset encoding if known
12519
* Create a parser context for using the XML parser with an existing
12522
* Returns the new parser context or NULL
12525
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12526
xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12527
void *ioctx, xmlCharEncoding enc) {
12528
xmlParserCtxtPtr ctxt;
12529
xmlParserInputPtr inputStream;
12530
xmlParserInputBufferPtr buf;
12532
if (ioread == NULL) return(NULL);
12534
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12536
if (ioclose != NULL)
12541
ctxt = xmlNewParserCtxt();
12542
if (ctxt == NULL) {
12543
xmlFreeParserInputBuffer(buf);
12547
#ifdef LIBXML_SAX1_ENABLED
12548
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12549
#endif /* LIBXML_SAX1_ENABLED */
12550
xmlFree(ctxt->sax);
12551
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12552
if (ctxt->sax == NULL) {
12553
xmlErrMemory(ctxt, NULL);
12554
xmlFreeParserCtxt(ctxt);
12557
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12558
if (sax->initialized == XML_SAX2_MAGIC)
12559
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12561
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12562
if (user_data != NULL)
12563
ctxt->userData = user_data;
12566
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12567
if (inputStream == NULL) {
12568
xmlFreeParserCtxt(ctxt);
12571
inputPush(ctxt, inputStream);
12576
#ifdef LIBXML_VALID_ENABLED
12577
/************************************************************************
12579
* Front ends when parsing a DTD *
12581
************************************************************************/
12585
* @sax: the SAX handler block or NULL
12586
* @input: an Input Buffer
12587
* @enc: the charset encoding if known
12589
* Load and parse a DTD
12591
* Returns the resulting xmlDtdPtr or NULL in case of error.
12592
* @input will be freed by the function in any case.
12596
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12597
xmlCharEncoding enc) {
12598
xmlDtdPtr ret = NULL;
12599
xmlParserCtxtPtr ctxt;
12600
xmlParserInputPtr pinput = NULL;
12606
ctxt = xmlNewParserCtxt();
12607
if (ctxt == NULL) {
12608
xmlFreeParserInputBuffer(input);
12613
* Set-up the SAX context
12616
if (ctxt->sax != NULL)
12617
xmlFree(ctxt->sax);
12619
ctxt->userData = ctxt;
12621
xmlDetectSAX2(ctxt);
12624
* generate a parser input from the I/O handler
12627
pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12628
if (pinput == NULL) {
12629
if (sax != NULL) ctxt->sax = NULL;
12630
xmlFreeParserInputBuffer(input);
12631
xmlFreeParserCtxt(ctxt);
12636
* plug some encoding conversion routines here.
12638
if (xmlPushInput(ctxt, pinput) < 0) {
12639
if (sax != NULL) ctxt->sax = NULL;
12640
xmlFreeParserCtxt(ctxt);
12643
if (enc != XML_CHAR_ENCODING_NONE) {
12644
xmlSwitchEncoding(ctxt, enc);
12647
pinput->filename = NULL;
12650
pinput->base = ctxt->input->cur;
12651
pinput->cur = ctxt->input->cur;
12652
pinput->free = NULL;
12655
* let's parse that entity knowing it's an external subset.
12657
ctxt->inSubset = 2;
12658
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12659
if (ctxt->myDoc == NULL) {
12660
xmlErrMemory(ctxt, "New Doc failed");
12663
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12664
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12665
BAD_CAST "none", BAD_CAST "none");
12667
if ((enc == XML_CHAR_ENCODING_NONE) &&
12668
((ctxt->input->end - ctxt->input->cur) >= 4)) {
12670
* Get the 4 first bytes and decode the charset
12671
* if enc != XML_CHAR_ENCODING_NONE
12672
* plug some encoding conversion routines.
12678
enc = xmlDetectCharEncoding(start, 4);
12679
if (enc != XML_CHAR_ENCODING_NONE) {
12680
xmlSwitchEncoding(ctxt, enc);
12684
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12686
if (ctxt->myDoc != NULL) {
12687
if (ctxt->wellFormed) {
12688
ret = ctxt->myDoc->extSubset;
12689
ctxt->myDoc->extSubset = NULL;
12694
tmp = ret->children;
12695
while (tmp != NULL) {
12703
xmlFreeDoc(ctxt->myDoc);
12704
ctxt->myDoc = NULL;
12706
if (sax != NULL) ctxt->sax = NULL;
12707
xmlFreeParserCtxt(ctxt);
12714
* @sax: the SAX handler block
12715
* @ExternalID: a NAME* containing the External ID of the DTD
12716
* @SystemID: a NAME* containing the URL to the DTD
12718
* Load and parse an external subset.
12720
* Returns the resulting xmlDtdPtr or NULL in case of error.
12724
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12725
const xmlChar *SystemID) {
12726
xmlDtdPtr ret = NULL;
12727
xmlParserCtxtPtr ctxt;
12728
xmlParserInputPtr input = NULL;
12729
xmlCharEncoding enc;
12730
xmlChar* systemIdCanonic;
12732
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12734
ctxt = xmlNewParserCtxt();
12735
if (ctxt == NULL) {
12740
* Set-up the SAX context
12743
if (ctxt->sax != NULL)
12744
xmlFree(ctxt->sax);
12746
ctxt->userData = ctxt;
12750
* Canonicalise the system ID
12752
systemIdCanonic = xmlCanonicPath(SystemID);
12753
if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12754
xmlFreeParserCtxt(ctxt);
12759
* Ask the Entity resolver to load the damn thing
12762
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12763
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12765
if (input == NULL) {
12766
if (sax != NULL) ctxt->sax = NULL;
12767
xmlFreeParserCtxt(ctxt);
12768
if (systemIdCanonic != NULL)
12769
xmlFree(systemIdCanonic);
12774
* plug some encoding conversion routines here.
12776
if (xmlPushInput(ctxt, input) < 0) {
12777
if (sax != NULL) ctxt->sax = NULL;
12778
xmlFreeParserCtxt(ctxt);
12779
if (systemIdCanonic != NULL)
12780
xmlFree(systemIdCanonic);
12783
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12784
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12785
xmlSwitchEncoding(ctxt, enc);
12788
if (input->filename == NULL)
12789
input->filename = (char *) systemIdCanonic;
12791
xmlFree(systemIdCanonic);
12794
input->base = ctxt->input->cur;
12795
input->cur = ctxt->input->cur;
12796
input->free = NULL;
12799
* let's parse that entity knowing it's an external subset.
12801
ctxt->inSubset = 2;
12802
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12803
if (ctxt->myDoc == NULL) {
12804
xmlErrMemory(ctxt, "New Doc failed");
12805
if (sax != NULL) ctxt->sax = NULL;
12806
xmlFreeParserCtxt(ctxt);
12809
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12810
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12811
ExternalID, SystemID);
12812
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12814
if (ctxt->myDoc != NULL) {
12815
if (ctxt->wellFormed) {
12816
ret = ctxt->myDoc->extSubset;
12817
ctxt->myDoc->extSubset = NULL;
12822
tmp = ret->children;
12823
while (tmp != NULL) {
12831
xmlFreeDoc(ctxt->myDoc);
12832
ctxt->myDoc = NULL;
12834
if (sax != NULL) ctxt->sax = NULL;
12835
xmlFreeParserCtxt(ctxt);
12843
* @ExternalID: a NAME* containing the External ID of the DTD
12844
* @SystemID: a NAME* containing the URL to the DTD
12846
* Load and parse an external subset.
12848
* Returns the resulting xmlDtdPtr or NULL in case of error.
12852
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12853
return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12855
#endif /* LIBXML_VALID_ENABLED */
12857
/************************************************************************
12859
* Front ends when parsing an Entity *
12861
************************************************************************/
12864
* xmlParseCtxtExternalEntity:
12865
* @ctx: the existing parsing context
12866
* @URL: the URL for the entity to load
12867
* @ID: the System ID for the entity to load
12868
* @lst: the return value for the set of parsed nodes
12870
* Parse an external general entity within an existing parsing context
12871
* An external general parsed entity is well-formed if it matches the
12872
* production labeled extParsedEnt.
12874
* [78] extParsedEnt ::= TextDecl? content
12876
* Returns 0 if the entity is well formed, -1 in case of args problem and
12877
* the parser error code otherwise
12881
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12882
const xmlChar *ID, xmlNodePtr *lst) {
12883
xmlParserCtxtPtr ctxt;
12885
xmlNodePtr newRoot;
12886
xmlSAXHandlerPtr oldsax = NULL;
12889
xmlCharEncoding enc;
12891
if (ctx == NULL) return(-1);
12893
if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12894
(ctx->depth > 1024)) {
12895
return(XML_ERR_ENTITY_LOOP);
12900
if ((URL == NULL) && (ID == NULL))
12902
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12905
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12906
if (ctxt == NULL) {
12910
oldsax = ctxt->sax;
12911
ctxt->sax = ctx->sax;
12912
xmlDetectSAX2(ctxt);
12913
newDoc = xmlNewDoc(BAD_CAST "1.0");
12914
if (newDoc == NULL) {
12915
xmlFreeParserCtxt(ctxt);
12918
newDoc->properties = XML_DOC_INTERNAL;
12919
if (ctx->myDoc->dict) {
12920
newDoc->dict = ctx->myDoc->dict;
12921
xmlDictReference(newDoc->dict);
12923
if (ctx->myDoc != NULL) {
12924
newDoc->intSubset = ctx->myDoc->intSubset;
12925
newDoc->extSubset = ctx->myDoc->extSubset;
12927
if (ctx->myDoc->URL != NULL) {
12928
newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12930
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12931
if (newRoot == NULL) {
12932
ctxt->sax = oldsax;
12933
xmlFreeParserCtxt(ctxt);
12934
newDoc->intSubset = NULL;
12935
newDoc->extSubset = NULL;
12936
xmlFreeDoc(newDoc);
12939
xmlAddChild((xmlNodePtr) newDoc, newRoot);
12940
nodePush(ctxt, newDoc->children);
12941
if (ctx->myDoc == NULL) {
12942
ctxt->myDoc = newDoc;
12944
ctxt->myDoc = ctx->myDoc;
12945
newDoc->children->doc = ctx->myDoc;
12949
* Get the 4 first bytes and decode the charset
12950
* if enc != XML_CHAR_ENCODING_NONE
12951
* plug some encoding conversion routines.
12954
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12959
enc = xmlDetectCharEncoding(start, 4);
12960
if (enc != XML_CHAR_ENCODING_NONE) {
12961
xmlSwitchEncoding(ctxt, enc);
12966
* Parse a possible text declaration first
12968
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12969
xmlParseTextDecl(ctxt);
12971
* An XML-1.0 document can't reference an entity not XML-1.0
12973
if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12974
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12975
xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12976
"Version mismatch between document and entity\n");
12981
* If the user provided its own SAX callbacks then reuse the
12982
* useData callback field, otherwise the expected setup in a
12983
* DOM builder is to have userData == ctxt
12985
if (ctx->userData == ctx)
12986
ctxt->userData = ctxt;
12988
ctxt->userData = ctx->userData;
12991
* Doing validity checking on chunk doesn't make sense
12993
ctxt->instate = XML_PARSER_CONTENT;
12994
ctxt->validate = ctx->validate;
12995
ctxt->valid = ctx->valid;
12996
ctxt->loadsubset = ctx->loadsubset;
12997
ctxt->depth = ctx->depth + 1;
12998
ctxt->replaceEntities = ctx->replaceEntities;
12999
if (ctxt->validate) {
13000
ctxt->vctxt.error = ctx->vctxt.error;
13001
ctxt->vctxt.warning = ctx->vctxt.warning;
13003
ctxt->vctxt.error = NULL;
13004
ctxt->vctxt.warning = NULL;
13006
ctxt->vctxt.nodeTab = NULL;
13007
ctxt->vctxt.nodeNr = 0;
13008
ctxt->vctxt.nodeMax = 0;
13009
ctxt->vctxt.node = NULL;
13010
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13011
ctxt->dict = ctx->dict;
13012
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13013
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13014
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13015
ctxt->dictNames = ctx->dictNames;
13016
ctxt->attsDefault = ctx->attsDefault;
13017
ctxt->attsSpecial = ctx->attsSpecial;
13018
ctxt->linenumbers = ctx->linenumbers;
13020
xmlParseContent(ctxt);
13022
ctx->validate = ctxt->validate;
13023
ctx->valid = ctxt->valid;
13024
if ((RAW == '<') && (NXT(1) == '/')) {
13025
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13026
} else if (RAW != 0) {
13027
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13029
if (ctxt->node != newDoc->children) {
13030
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13033
if (!ctxt->wellFormed) {
13034
if (ctxt->errNo == 0)
13043
* Return the newly created nodeset after unlinking it from
13044
* they pseudo parent.
13046
cur = newDoc->children->children;
13048
while (cur != NULL) {
13049
cur->parent = NULL;
13052
newDoc->children->children = NULL;
13056
ctxt->sax = oldsax;
13058
ctxt->attsDefault = NULL;
13059
ctxt->attsSpecial = NULL;
13060
xmlFreeParserCtxt(ctxt);
13061
newDoc->intSubset = NULL;
13062
newDoc->extSubset = NULL;
13063
xmlFreeDoc(newDoc);
13069
* xmlParseExternalEntityPrivate:
13070
* @doc: the document the chunk pertains to
13071
* @oldctxt: the previous parser context if available
13072
* @sax: the SAX handler bloc (possibly NULL)
13073
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13074
* @depth: Used for loop detection, use 0
13075
* @URL: the URL for the entity to load
13076
* @ID: the System ID for the entity to load
13077
* @list: the return value for the set of parsed nodes
13079
* Private version of xmlParseExternalEntity()
13081
* Returns 0 if the entity is well formed, -1 in case of args problem and
13082
* the parser error code otherwise
13085
static xmlParserErrors
13086
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13087
xmlSAXHandlerPtr sax,
13088
void *user_data, int depth, const xmlChar *URL,
13089
const xmlChar *ID, xmlNodePtr *list) {
13090
xmlParserCtxtPtr ctxt;
13092
xmlNodePtr newRoot;
13093
xmlSAXHandlerPtr oldsax = NULL;
13094
xmlParserErrors ret = XML_ERR_OK;
13096
xmlCharEncoding enc;
13098
if (((depth > 40) &&
13099
((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13101
return(XML_ERR_ENTITY_LOOP);
13106
if ((URL == NULL) && (ID == NULL))
13107
return(XML_ERR_INTERNAL_ERROR);
13109
return(XML_ERR_INTERNAL_ERROR);
13112
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13113
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13114
ctxt->userData = ctxt;
13115
if (oldctxt != NULL) {
13116
ctxt->_private = oldctxt->_private;
13117
ctxt->loadsubset = oldctxt->loadsubset;
13118
ctxt->validate = oldctxt->validate;
13119
ctxt->external = oldctxt->external;
13120
ctxt->record_info = oldctxt->record_info;
13121
ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13122
ctxt->node_seq.length = oldctxt->node_seq.length;
13123
ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13126
* Doing validity checking on chunk without context
13127
* doesn't make sense
13129
ctxt->_private = NULL;
13130
ctxt->validate = 0;
13131
ctxt->external = 2;
13132
ctxt->loadsubset = 0;
13135
oldsax = ctxt->sax;
13137
if (user_data != NULL)
13138
ctxt->userData = user_data;
13140
xmlDetectSAX2(ctxt);
13141
newDoc = xmlNewDoc(BAD_CAST "1.0");
13142
if (newDoc == NULL) {
13143
ctxt->node_seq.maximum = 0;
13144
ctxt->node_seq.length = 0;
13145
ctxt->node_seq.buffer = NULL;
13146
xmlFreeParserCtxt(ctxt);
13147
return(XML_ERR_INTERNAL_ERROR);
13149
newDoc->properties = XML_DOC_INTERNAL;
13150
newDoc->intSubset = doc->intSubset;
13151
newDoc->extSubset = doc->extSubset;
13152
newDoc->dict = doc->dict;
13153
xmlDictReference(newDoc->dict);
13155
if (doc->URL != NULL) {
13156
newDoc->URL = xmlStrdup(doc->URL);
13158
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13159
if (newRoot == NULL) {
13161
ctxt->sax = oldsax;
13162
ctxt->node_seq.maximum = 0;
13163
ctxt->node_seq.length = 0;
13164
ctxt->node_seq.buffer = NULL;
13165
xmlFreeParserCtxt(ctxt);
13166
newDoc->intSubset = NULL;
13167
newDoc->extSubset = NULL;
13168
xmlFreeDoc(newDoc);
13169
return(XML_ERR_INTERNAL_ERROR);
13171
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13172
nodePush(ctxt, newDoc->children);
13174
newRoot->doc = doc;
13177
* Get the 4 first bytes and decode the charset
13178
* if enc != XML_CHAR_ENCODING_NONE
13179
* plug some encoding conversion routines.
13182
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13187
enc = xmlDetectCharEncoding(start, 4);
13188
if (enc != XML_CHAR_ENCODING_NONE) {
13189
xmlSwitchEncoding(ctxt, enc);
13194
* Parse a possible text declaration first
13196
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13197
xmlParseTextDecl(ctxt);
13200
ctxt->instate = XML_PARSER_CONTENT;
13201
ctxt->depth = depth;
13203
xmlParseContent(ctxt);
13205
if ((RAW == '<') && (NXT(1) == '/')) {
13206
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13207
} else if (RAW != 0) {
13208
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13210
if (ctxt->node != newDoc->children) {
13211
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13214
if (!ctxt->wellFormed) {
13215
if (ctxt->errNo == 0)
13216
ret = XML_ERR_INTERNAL_ERROR;
13218
ret = (xmlParserErrors)ctxt->errNo;
13220
if (list != NULL) {
13224
* Return the newly created nodeset after unlinking it from
13225
* they pseudo parent.
13227
cur = newDoc->children->children;
13229
while (cur != NULL) {
13230
cur->parent = NULL;
13233
newDoc->children->children = NULL;
13239
* Record in the parent context the number of entities replacement
13240
* done when parsing that reference.
13242
if (oldctxt != NULL)
13243
oldctxt->nbentities += ctxt->nbentities;
13246
* Also record the size of the entity parsed
13248
if (ctxt->input != NULL) {
13249
oldctxt->sizeentities += ctxt->input->consumed;
13250
oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13253
* And record the last error if any
13255
if (ctxt->lastError.code != XML_ERR_OK)
13256
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13259
ctxt->sax = oldsax;
13260
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13261
oldctxt->node_seq.length = ctxt->node_seq.length;
13262
oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13263
ctxt->node_seq.maximum = 0;
13264
ctxt->node_seq.length = 0;
13265
ctxt->node_seq.buffer = NULL;
13266
xmlFreeParserCtxt(ctxt);
13267
newDoc->intSubset = NULL;
13268
newDoc->extSubset = NULL;
13269
xmlFreeDoc(newDoc);
13274
#ifdef LIBXML_SAX1_ENABLED
13276
* xmlParseExternalEntity:
13277
* @doc: the document the chunk pertains to
13278
* @sax: the SAX handler bloc (possibly NULL)
13279
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13280
* @depth: Used for loop detection, use 0
13281
* @URL: the URL for the entity to load
13282
* @ID: the System ID for the entity to load
13283
* @lst: the return value for the set of parsed nodes
13285
* Parse an external general entity
13286
* An external general parsed entity is well-formed if it matches the
13287
* production labeled extParsedEnt.
13289
* [78] extParsedEnt ::= TextDecl? content
13291
* Returns 0 if the entity is well formed, -1 in case of args problem and
13292
* the parser error code otherwise
13296
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13297
int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13298
return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13303
* xmlParseBalancedChunkMemory:
13304
* @doc: the document the chunk pertains to
13305
* @sax: the SAX handler bloc (possibly NULL)
13306
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13307
* @depth: Used for loop detection, use 0
13308
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13309
* @lst: the return value for the set of parsed nodes
13311
* Parse a well-balanced chunk of an XML document
13312
* called by the parser
13313
* The allowed sequence for the Well Balanced Chunk is the one defined by
13314
* the content production in the XML grammar:
13316
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13318
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13319
* the parser error code otherwise
13323
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13324
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13325
return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13326
depth, string, lst, 0 );
13328
#endif /* LIBXML_SAX1_ENABLED */
13331
* xmlParseBalancedChunkMemoryInternal:
13332
* @oldctxt: the existing parsing context
13333
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13334
* @user_data: the user data field for the parser context
13335
* @lst: the return value for the set of parsed nodes
13338
* Parse a well-balanced chunk of an XML document
13339
* called by the parser
13340
* The allowed sequence for the Well Balanced Chunk is the one defined by
13341
* the content production in the XML grammar:
13343
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13345
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13346
* error code otherwise
13348
* In case recover is set to 1, the nodelist will not be empty even if
13349
* the parsed chunk is not well balanced.
13351
static xmlParserErrors
13352
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13353
const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13354
xmlParserCtxtPtr ctxt;
13355
xmlDocPtr newDoc = NULL;
13356
xmlNodePtr newRoot;
13357
xmlSAXHandlerPtr oldsax = NULL;
13358
xmlNodePtr content = NULL;
13359
xmlNodePtr last = NULL;
13361
xmlParserErrors ret = XML_ERR_OK;
13366
if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13367
(oldctxt->depth > 1024)) {
13368
return(XML_ERR_ENTITY_LOOP);
13374
if (string == NULL)
13375
return(XML_ERR_INTERNAL_ERROR);
13377
size = xmlStrlen(string);
13379
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13380
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13381
if (user_data != NULL)
13382
ctxt->userData = user_data;
13384
ctxt->userData = ctxt;
13385
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13386
ctxt->dict = oldctxt->dict;
13387
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13388
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13389
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13392
/* propagate namespaces down the entity */
13393
for (i = 0;i < oldctxt->nsNr;i += 2) {
13394
nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13398
oldsax = ctxt->sax;
13399
ctxt->sax = oldctxt->sax;
13400
xmlDetectSAX2(ctxt);
13401
ctxt->replaceEntities = oldctxt->replaceEntities;
13402
ctxt->options = oldctxt->options;
13404
ctxt->_private = oldctxt->_private;
13405
if (oldctxt->myDoc == NULL) {
13406
newDoc = xmlNewDoc(BAD_CAST "1.0");
13407
if (newDoc == NULL) {
13408
ctxt->sax = oldsax;
13410
xmlFreeParserCtxt(ctxt);
13411
return(XML_ERR_INTERNAL_ERROR);
13413
newDoc->properties = XML_DOC_INTERNAL;
13414
newDoc->dict = ctxt->dict;
13415
xmlDictReference(newDoc->dict);
13416
ctxt->myDoc = newDoc;
13418
ctxt->myDoc = oldctxt->myDoc;
13419
content = ctxt->myDoc->children;
13420
last = ctxt->myDoc->last;
13422
newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13423
if (newRoot == NULL) {
13424
ctxt->sax = oldsax;
13426
xmlFreeParserCtxt(ctxt);
13427
if (newDoc != NULL) {
13428
xmlFreeDoc(newDoc);
13430
return(XML_ERR_INTERNAL_ERROR);
13432
ctxt->myDoc->children = NULL;
13433
ctxt->myDoc->last = NULL;
13434
xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13435
nodePush(ctxt, ctxt->myDoc->children);
13436
ctxt->instate = XML_PARSER_CONTENT;
13437
ctxt->depth = oldctxt->depth + 1;
13439
ctxt->validate = 0;
13440
ctxt->loadsubset = oldctxt->loadsubset;
13441
if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13443
* ID/IDREF registration will be done in xmlValidateElement below
13445
ctxt->loadsubset |= XML_SKIP_IDS;
13447
ctxt->dictNames = oldctxt->dictNames;
13448
ctxt->attsDefault = oldctxt->attsDefault;
13449
ctxt->attsSpecial = oldctxt->attsSpecial;
13451
xmlParseContent(ctxt);
13452
if ((RAW == '<') && (NXT(1) == '/')) {
13453
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13454
} else if (RAW != 0) {
13455
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13457
if (ctxt->node != ctxt->myDoc->children) {
13458
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13461
if (!ctxt->wellFormed) {
13462
if (ctxt->errNo == 0)
13463
ret = XML_ERR_INTERNAL_ERROR;
13465
ret = (xmlParserErrors)ctxt->errNo;
13470
if ((lst != NULL) && (ret == XML_ERR_OK)) {
13474
* Return the newly created nodeset after unlinking it from
13475
* they pseudo parent.
13477
cur = ctxt->myDoc->children->children;
13479
while (cur != NULL) {
13480
#ifdef LIBXML_VALID_ENABLED
13481
if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13482
(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13483
(cur->type == XML_ELEMENT_NODE)) {
13484
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13485
oldctxt->myDoc, cur);
13487
#endif /* LIBXML_VALID_ENABLED */
13488
cur->parent = NULL;
13491
ctxt->myDoc->children->children = NULL;
13493
if (ctxt->myDoc != NULL) {
13494
xmlFreeNode(ctxt->myDoc->children);
13495
ctxt->myDoc->children = content;
13496
ctxt->myDoc->last = last;
13500
* Record in the parent context the number of entities replacement
13501
* done when parsing that reference.
13503
if (oldctxt != NULL)
13504
oldctxt->nbentities += ctxt->nbentities;
13507
* Also record the last error if any
13509
if (ctxt->lastError.code != XML_ERR_OK)
13510
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13512
ctxt->sax = oldsax;
13514
ctxt->attsDefault = NULL;
13515
ctxt->attsSpecial = NULL;
13516
xmlFreeParserCtxt(ctxt);
13517
if (newDoc != NULL) {
13518
xmlFreeDoc(newDoc);
13525
* xmlParseInNodeContext:
13526
* @node: the context node
13527
* @data: the input string
13528
* @datalen: the input string length in bytes
13529
* @options: a combination of xmlParserOption
13530
* @lst: the return value for the set of parsed nodes
13532
* Parse a well-balanced chunk of an XML document
13533
* within the context (DTD, namespaces, etc ...) of the given node.
13535
* The allowed sequence for the data is a Well Balanced Chunk defined by
13536
* the content production in the XML grammar:
13538
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13540
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13541
* error code otherwise
13544
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13545
int options, xmlNodePtr *lst) {
13547
xmlParserCtxtPtr ctxt;
13548
xmlDocPtr doc = NULL;
13549
xmlNodePtr fake, cur;
13552
xmlParserErrors ret = XML_ERR_OK;
13555
* check all input parameters, grab the document
13557
if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13558
return(XML_ERR_INTERNAL_ERROR);
13559
switch (node->type) {
13560
case XML_ELEMENT_NODE:
13561
case XML_ATTRIBUTE_NODE:
13562
case XML_TEXT_NODE:
13563
case XML_CDATA_SECTION_NODE:
13564
case XML_ENTITY_REF_NODE:
13566
case XML_COMMENT_NODE:
13567
case XML_DOCUMENT_NODE:
13568
case XML_HTML_DOCUMENT_NODE:
13571
return(XML_ERR_INTERNAL_ERROR);
13574
while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13575
(node->type != XML_DOCUMENT_NODE) &&
13576
(node->type != XML_HTML_DOCUMENT_NODE))
13577
node = node->parent;
13579
return(XML_ERR_INTERNAL_ERROR);
13580
if (node->type == XML_ELEMENT_NODE)
13583
doc = (xmlDocPtr) node;
13585
return(XML_ERR_INTERNAL_ERROR);
13588
* allocate a context and set-up everything not related to the
13589
* node position in the tree
13591
if (doc->type == XML_DOCUMENT_NODE)
13592
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13593
#ifdef LIBXML_HTML_ENABLED
13594
else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13595
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13597
* When parsing in context, it makes no sense to add implied
13598
* elements like html/body/etc...
13600
options |= HTML_PARSE_NOIMPLIED;
13604
return(XML_ERR_INTERNAL_ERROR);
13607
return(XML_ERR_NO_MEMORY);
13610
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13611
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13612
* we must wait until the last moment to free the original one.
13614
if (doc->dict != NULL) {
13615
if (ctxt->dict != NULL)
13616
xmlDictFree(ctxt->dict);
13617
ctxt->dict = doc->dict;
13619
options |= XML_PARSE_NODICT;
13621
if (doc->encoding != NULL) {
13622
xmlCharEncodingHandlerPtr hdlr;
13624
if (ctxt->encoding != NULL)
13625
xmlFree((xmlChar *) ctxt->encoding);
13626
ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13628
hdlr = xmlFindCharEncodingHandler(doc->encoding);
13629
if (hdlr != NULL) {
13630
xmlSwitchToEncoding(ctxt, hdlr);
13632
return(XML_ERR_UNSUPPORTED_ENCODING);
13636
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13637
xmlDetectSAX2(ctxt);
13640
fake = xmlNewComment(NULL);
13641
if (fake == NULL) {
13642
xmlFreeParserCtxt(ctxt);
13643
return(XML_ERR_NO_MEMORY);
13645
xmlAddChild(node, fake);
13647
if (node->type == XML_ELEMENT_NODE) {
13648
nodePush(ctxt, node);
13650
* initialize the SAX2 namespaces stack
13653
while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13654
xmlNsPtr ns = cur->nsDef;
13655
const xmlChar *iprefix, *ihref;
13657
while (ns != NULL) {
13659
iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13660
ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13662
iprefix = ns->prefix;
13666
if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13667
nsPush(ctxt, iprefix, ihref);
13674
ctxt->instate = XML_PARSER_CONTENT;
13677
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13679
* ID/IDREF registration will be done in xmlValidateElement below
13681
ctxt->loadsubset |= XML_SKIP_IDS;
13684
#ifdef LIBXML_HTML_ENABLED
13685
if (doc->type == XML_HTML_DOCUMENT_NODE)
13686
__htmlParseContent(ctxt);
13689
xmlParseContent(ctxt);
13692
if ((RAW == '<') && (NXT(1) == '/')) {
13693
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13694
} else if (RAW != 0) {
13695
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13697
if ((ctxt->node != NULL) && (ctxt->node != node)) {
13698
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13699
ctxt->wellFormed = 0;
13702
if (!ctxt->wellFormed) {
13703
if (ctxt->errNo == 0)
13704
ret = XML_ERR_INTERNAL_ERROR;
13706
ret = (xmlParserErrors)ctxt->errNo;
13712
* Return the newly created nodeset after unlinking it from
13713
* the pseudo sibling.
13726
while (cur != NULL) {
13727
cur->parent = NULL;
13731
xmlUnlinkNode(fake);
13735
if (ret != XML_ERR_OK) {
13736
xmlFreeNodeList(*lst);
13740
if (doc->dict != NULL)
13742
xmlFreeParserCtxt(ctxt);
13746
return(XML_ERR_INTERNAL_ERROR);
13750
#ifdef LIBXML_SAX1_ENABLED
13752
* xmlParseBalancedChunkMemoryRecover:
13753
* @doc: the document the chunk pertains to
13754
* @sax: the SAX handler bloc (possibly NULL)
13755
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13756
* @depth: Used for loop detection, use 0
13757
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13758
* @lst: the return value for the set of parsed nodes
13759
* @recover: return nodes even if the data is broken (use 0)
13762
* Parse a well-balanced chunk of an XML document
13763
* called by the parser
13764
* The allowed sequence for the Well Balanced Chunk is the one defined by
13765
* the content production in the XML grammar:
13767
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13769
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13770
* the parser error code otherwise
13772
* In case recover is set to 1, the nodelist will not be empty even if
13773
* the parsed chunk is not well balanced, assuming the parsing succeeded to
13777
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13778
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13780
xmlParserCtxtPtr ctxt;
13782
xmlSAXHandlerPtr oldsax = NULL;
13783
xmlNodePtr content, newRoot;
13788
return(XML_ERR_ENTITY_LOOP);
13794
if (string == NULL)
13797
size = xmlStrlen(string);
13799
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13800
if (ctxt == NULL) return(-1);
13801
ctxt->userData = ctxt;
13803
oldsax = ctxt->sax;
13805
if (user_data != NULL)
13806
ctxt->userData = user_data;
13808
newDoc = xmlNewDoc(BAD_CAST "1.0");
13809
if (newDoc == NULL) {
13810
xmlFreeParserCtxt(ctxt);
13813
newDoc->properties = XML_DOC_INTERNAL;
13814
if ((doc != NULL) && (doc->dict != NULL)) {
13815
xmlDictFree(ctxt->dict);
13816
ctxt->dict = doc->dict;
13817
xmlDictReference(ctxt->dict);
13818
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13819
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13820
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13821
ctxt->dictNames = 1;
13823
xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13826
newDoc->intSubset = doc->intSubset;
13827
newDoc->extSubset = doc->extSubset;
13829
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13830
if (newRoot == NULL) {
13832
ctxt->sax = oldsax;
13833
xmlFreeParserCtxt(ctxt);
13834
newDoc->intSubset = NULL;
13835
newDoc->extSubset = NULL;
13836
xmlFreeDoc(newDoc);
13839
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13840
nodePush(ctxt, newRoot);
13842
ctxt->myDoc = newDoc;
13844
ctxt->myDoc = newDoc;
13845
newDoc->children->doc = doc;
13846
/* Ensure that doc has XML spec namespace */
13847
xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13848
newDoc->oldNs = doc->oldNs;
13850
ctxt->instate = XML_PARSER_CONTENT;
13851
ctxt->depth = depth;
13854
* Doing validity checking on chunk doesn't make sense
13856
ctxt->validate = 0;
13857
ctxt->loadsubset = 0;
13858
xmlDetectSAX2(ctxt);
13860
if ( doc != NULL ){
13861
content = doc->children;
13862
doc->children = NULL;
13863
xmlParseContent(ctxt);
13864
doc->children = content;
13867
xmlParseContent(ctxt);
13869
if ((RAW == '<') && (NXT(1) == '/')) {
13870
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13871
} else if (RAW != 0) {
13872
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13874
if (ctxt->node != newDoc->children) {
13875
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13878
if (!ctxt->wellFormed) {
13879
if (ctxt->errNo == 0)
13887
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13891
* Return the newly created nodeset after unlinking it from
13892
* they pseudo parent.
13894
cur = newDoc->children->children;
13896
while (cur != NULL) {
13897
xmlSetTreeDoc(cur, doc);
13898
cur->parent = NULL;
13901
newDoc->children->children = NULL;
13905
ctxt->sax = oldsax;
13906
xmlFreeParserCtxt(ctxt);
13907
newDoc->intSubset = NULL;
13908
newDoc->extSubset = NULL;
13909
newDoc->oldNs = NULL;
13910
xmlFreeDoc(newDoc);
13916
* xmlSAXParseEntity:
13917
* @sax: the SAX handler block
13918
* @filename: the filename
13920
* parse an XML external entity out of context and build a tree.
13921
* It use the given SAX function block to handle the parsing callback.
13922
* If sax is NULL, fallback to the default DOM tree building routines.
13924
* [78] extParsedEnt ::= TextDecl? content
13926
* This correspond to a "Well Balanced" chunk
13928
* Returns the resulting document tree
13932
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13934
xmlParserCtxtPtr ctxt;
13936
ctxt = xmlCreateFileParserCtxt(filename);
13937
if (ctxt == NULL) {
13941
if (ctxt->sax != NULL)
13942
xmlFree(ctxt->sax);
13944
ctxt->userData = NULL;
13947
xmlParseExtParsedEnt(ctxt);
13949
if (ctxt->wellFormed)
13953
xmlFreeDoc(ctxt->myDoc);
13954
ctxt->myDoc = NULL;
13958
xmlFreeParserCtxt(ctxt);
13965
* @filename: the filename
13967
* parse an XML external entity out of context and build a tree.
13969
* [78] extParsedEnt ::= TextDecl? content
13971
* This correspond to a "Well Balanced" chunk
13973
* Returns the resulting document tree
13977
xmlParseEntity(const char *filename) {
13978
return(xmlSAXParseEntity(NULL, filename));
13980
#endif /* LIBXML_SAX1_ENABLED */
13983
* xmlCreateEntityParserCtxtInternal:
13984
* @URL: the entity URL
13985
* @ID: the entity PUBLIC ID
13986
* @base: a possible base for the target URI
13987
* @pctx: parser context used to set options on new context
13989
* Create a parser context for an external entity
13990
* Automatic support for ZLIB/Compress compressed document is provided
13991
* by default if found at compile-time.
13993
* Returns the new parser context or NULL
13995
static xmlParserCtxtPtr
13996
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13997
const xmlChar *base, xmlParserCtxtPtr pctx) {
13998
xmlParserCtxtPtr ctxt;
13999
xmlParserInputPtr inputStream;
14000
char *directory = NULL;
14003
ctxt = xmlNewParserCtxt();
14004
if (ctxt == NULL) {
14008
if (pctx != NULL) {
14009
ctxt->options = pctx->options;
14010
ctxt->_private = pctx->_private;
14013
uri = xmlBuildURI(URL, base);
14016
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14017
if (inputStream == NULL) {
14018
xmlFreeParserCtxt(ctxt);
14022
inputPush(ctxt, inputStream);
14024
if ((ctxt->directory == NULL) && (directory == NULL))
14025
directory = xmlParserGetDirectory((char *)URL);
14026
if ((ctxt->directory == NULL) && (directory != NULL))
14027
ctxt->directory = directory;
14029
inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14030
if (inputStream == NULL) {
14032
xmlFreeParserCtxt(ctxt);
14036
inputPush(ctxt, inputStream);
14038
if ((ctxt->directory == NULL) && (directory == NULL))
14039
directory = xmlParserGetDirectory((char *)uri);
14040
if ((ctxt->directory == NULL) && (directory != NULL))
14041
ctxt->directory = directory;
14048
* xmlCreateEntityParserCtxt:
14049
* @URL: the entity URL
14050
* @ID: the entity PUBLIC ID
14051
* @base: a possible base for the target URI
14053
* Create a parser context for an external entity
14054
* Automatic support for ZLIB/Compress compressed document is provided
14055
* by default if found at compile-time.
14057
* Returns the new parser context or NULL
14060
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14061
const xmlChar *base) {
14062
return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14066
/************************************************************************
14068
* Front ends when parsing from a file *
14070
************************************************************************/
14073
* xmlCreateURLParserCtxt:
14074
* @filename: the filename or URL
14075
* @options: a combination of xmlParserOption
14077
* Create a parser context for a file or URL content.
14078
* Automatic support for ZLIB/Compress compressed document is provided
14079
* by default if found at compile-time and for file accesses
14081
* Returns the new parser context or NULL
14084
xmlCreateURLParserCtxt(const char *filename, int options)
14086
xmlParserCtxtPtr ctxt;
14087
xmlParserInputPtr inputStream;
14088
char *directory = NULL;
14090
ctxt = xmlNewParserCtxt();
14091
if (ctxt == NULL) {
14092
xmlErrMemory(NULL, "cannot allocate parser context");
14097
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14098
ctxt->linenumbers = 1;
14100
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14101
if (inputStream == NULL) {
14102
xmlFreeParserCtxt(ctxt);
14106
inputPush(ctxt, inputStream);
14107
if ((ctxt->directory == NULL) && (directory == NULL))
14108
directory = xmlParserGetDirectory(filename);
14109
if ((ctxt->directory == NULL) && (directory != NULL))
14110
ctxt->directory = directory;
14116
* xmlCreateFileParserCtxt:
14117
* @filename: the filename
14119
* Create a parser context for a file content.
14120
* Automatic support for ZLIB/Compress compressed document is provided
14121
* by default if found at compile-time.
14123
* Returns the new parser context or NULL
14126
xmlCreateFileParserCtxt(const char *filename)
14128
return(xmlCreateURLParserCtxt(filename, 0));
14131
#ifdef LIBXML_SAX1_ENABLED
14133
* xmlSAXParseFileWithData:
14134
* @sax: the SAX handler block
14135
* @filename: the filename
14136
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14138
* @data: the userdata
14140
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14141
* compressed document is provided by default if found at compile-time.
14142
* It use the given SAX function block to handle the parsing callback.
14143
* If sax is NULL, fallback to the default DOM tree building routines.
14145
* User data (void *) is stored within the parser context in the
14146
* context's _private member, so it is available nearly everywhere in libxml
14148
* Returns the resulting document tree
14152
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14153
int recovery, void *data) {
14155
xmlParserCtxtPtr ctxt;
14159
ctxt = xmlCreateFileParserCtxt(filename);
14160
if (ctxt == NULL) {
14164
if (ctxt->sax != NULL)
14165
xmlFree(ctxt->sax);
14168
xmlDetectSAX2(ctxt);
14170
ctxt->_private = data;
14173
if (ctxt->directory == NULL)
14174
ctxt->directory = xmlParserGetDirectory(filename);
14176
ctxt->recovery = recovery;
14178
xmlParseDocument(ctxt);
14180
if ((ctxt->wellFormed) || recovery) {
14183
if (ctxt->input->buf->compressed > 0)
14184
ret->compression = 9;
14186
ret->compression = ctxt->input->buf->compressed;
14191
xmlFreeDoc(ctxt->myDoc);
14192
ctxt->myDoc = NULL;
14196
xmlFreeParserCtxt(ctxt);
14203
* @sax: the SAX handler block
14204
* @filename: the filename
14205
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14208
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14209
* compressed document is provided by default if found at compile-time.
14210
* It use the given SAX function block to handle the parsing callback.
14211
* If sax is NULL, fallback to the default DOM tree building routines.
14213
* Returns the resulting document tree
14217
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14219
return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14224
* @cur: a pointer to an array of xmlChar
14226
* parse an XML in-memory document and build a tree.
14227
* In the case the document is not Well Formed, a attempt to build a
14228
* tree is tried anyway
14230
* Returns the resulting document tree or NULL in case of failure
14234
xmlRecoverDoc(const xmlChar *cur) {
14235
return(xmlSAXParseDoc(NULL, cur, 1));
14240
* @filename: the filename
14242
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14243
* compressed document is provided by default if found at compile-time.
14245
* Returns the resulting document tree if the file was wellformed,
14250
xmlParseFile(const char *filename) {
14251
return(xmlSAXParseFile(NULL, filename, 0));
14256
* @filename: the filename
14258
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14259
* compressed document is provided by default if found at compile-time.
14260
* In the case the document is not Well Formed, it attempts to build
14263
* Returns the resulting document tree or NULL in case of failure
14267
xmlRecoverFile(const char *filename) {
14268
return(xmlSAXParseFile(NULL, filename, 1));
14273
* xmlSetupParserForBuffer:
14274
* @ctxt: an XML parser context
14275
* @buffer: a xmlChar * buffer
14276
* @filename: a file name
14278
* Setup the parser context to parse a new buffer; Clears any prior
14279
* contents from the parser context. The buffer parameter must not be
14280
* NULL, but the filename parameter can be
14283
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14284
const char* filename)
14286
xmlParserInputPtr input;
14288
if ((ctxt == NULL) || (buffer == NULL))
14291
input = xmlNewInputStream(ctxt);
14292
if (input == NULL) {
14293
xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14294
xmlClearParserCtxt(ctxt);
14298
xmlClearParserCtxt(ctxt);
14299
if (filename != NULL)
14300
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14301
input->base = buffer;
14302
input->cur = buffer;
14303
input->end = &buffer[xmlStrlen(buffer)];
14304
inputPush(ctxt, input);
14308
* xmlSAXUserParseFile:
14309
* @sax: a SAX handler
14310
* @user_data: The user data returned on SAX callbacks
14311
* @filename: a file name
14313
* parse an XML file and call the given SAX handler routines.
14314
* Automatic support for ZLIB/Compress compressed document is provided
14316
* Returns 0 in case of success or a error number otherwise
14319
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14320
const char *filename) {
14322
xmlParserCtxtPtr ctxt;
14324
ctxt = xmlCreateFileParserCtxt(filename);
14325
if (ctxt == NULL) return -1;
14326
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14327
xmlFree(ctxt->sax);
14329
xmlDetectSAX2(ctxt);
14331
if (user_data != NULL)
14332
ctxt->userData = user_data;
14334
xmlParseDocument(ctxt);
14336
if (ctxt->wellFormed)
14339
if (ctxt->errNo != 0)
14346
if (ctxt->myDoc != NULL) {
14347
xmlFreeDoc(ctxt->myDoc);
14348
ctxt->myDoc = NULL;
14350
xmlFreeParserCtxt(ctxt);
14354
#endif /* LIBXML_SAX1_ENABLED */
14356
/************************************************************************
14358
* Front ends when parsing from memory *
14360
************************************************************************/
14363
* xmlCreateMemoryParserCtxt:
14364
* @buffer: a pointer to a char array
14365
* @size: the size of the array
14367
* Create a parser context for an XML in-memory document.
14369
* Returns the new parser context or NULL
14372
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14373
xmlParserCtxtPtr ctxt;
14374
xmlParserInputPtr input;
14375
xmlParserInputBufferPtr buf;
14377
if (buffer == NULL)
14382
ctxt = xmlNewParserCtxt();
14386
/* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14387
buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14389
xmlFreeParserCtxt(ctxt);
14393
input = xmlNewInputStream(ctxt);
14394
if (input == NULL) {
14395
xmlFreeParserInputBuffer(buf);
14396
xmlFreeParserCtxt(ctxt);
14400
input->filename = NULL;
14402
xmlBufResetInput(input->buf->buffer, input);
14404
inputPush(ctxt, input);
14408
#ifdef LIBXML_SAX1_ENABLED
14410
* xmlSAXParseMemoryWithData:
14411
* @sax: the SAX handler block
14412
* @buffer: an pointer to a char array
14413
* @size: the size of the array
14414
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14416
* @data: the userdata
14418
* parse an XML in-memory block and use the given SAX function block
14419
* to handle the parsing callback. If sax is NULL, fallback to the default
14420
* DOM tree building routines.
14422
* User data (void *) is stored within the parser context in the
14423
* context's _private member, so it is available nearly everywhere in libxml
14425
* Returns the resulting document tree
14429
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14430
int size, int recovery, void *data) {
14432
xmlParserCtxtPtr ctxt;
14436
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14437
if (ctxt == NULL) return(NULL);
14439
if (ctxt->sax != NULL)
14440
xmlFree(ctxt->sax);
14443
xmlDetectSAX2(ctxt);
14445
ctxt->_private=data;
14448
ctxt->recovery = recovery;
14450
xmlParseDocument(ctxt);
14452
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14455
xmlFreeDoc(ctxt->myDoc);
14456
ctxt->myDoc = NULL;
14460
xmlFreeParserCtxt(ctxt);
14466
* xmlSAXParseMemory:
14467
* @sax: the SAX handler block
14468
* @buffer: an pointer to a char array
14469
* @size: the size of the array
14470
* @recovery: work in recovery mode, i.e. tries to read not Well Formed
14473
* parse an XML in-memory block and use the given SAX function block
14474
* to handle the parsing callback. If sax is NULL, fallback to the default
14475
* DOM tree building routines.
14477
* Returns the resulting document tree
14480
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14481
int size, int recovery) {
14482
return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14487
* @buffer: an pointer to a char array
14488
* @size: the size of the array
14490
* parse an XML in-memory block and build a tree.
14492
* Returns the resulting document tree
14495
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14496
return(xmlSAXParseMemory(NULL, buffer, size, 0));
14500
* xmlRecoverMemory:
14501
* @buffer: an pointer to a char array
14502
* @size: the size of the array
14504
* parse an XML in-memory block and build a tree.
14505
* In the case the document is not Well Formed, an attempt to
14506
* build a tree is tried anyway
14508
* Returns the resulting document tree or NULL in case of error
14511
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14512
return(xmlSAXParseMemory(NULL, buffer, size, 1));
14516
* xmlSAXUserParseMemory:
14517
* @sax: a SAX handler
14518
* @user_data: The user data returned on SAX callbacks
14519
* @buffer: an in-memory XML document input
14520
* @size: the length of the XML document in bytes
14522
* A better SAX parsing routine.
14523
* parse an XML in-memory buffer and call the given SAX handler routines.
14525
* Returns 0 in case of success or a error number otherwise
14527
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14528
const char *buffer, int size) {
14530
xmlParserCtxtPtr ctxt;
14534
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14535
if (ctxt == NULL) return -1;
14536
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14537
xmlFree(ctxt->sax);
14539
xmlDetectSAX2(ctxt);
14541
if (user_data != NULL)
14542
ctxt->userData = user_data;
14544
xmlParseDocument(ctxt);
14546
if (ctxt->wellFormed)
14549
if (ctxt->errNo != 0)
14556
if (ctxt->myDoc != NULL) {
14557
xmlFreeDoc(ctxt->myDoc);
14558
ctxt->myDoc = NULL;
14560
xmlFreeParserCtxt(ctxt);
14564
#endif /* LIBXML_SAX1_ENABLED */
14567
* xmlCreateDocParserCtxt:
14568
* @cur: a pointer to an array of xmlChar
14570
* Creates a parser context for an XML in-memory document.
14572
* Returns the new parser context or NULL
14575
xmlCreateDocParserCtxt(const xmlChar *cur) {
14580
len = xmlStrlen(cur);
14581
return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14584
#ifdef LIBXML_SAX1_ENABLED
14587
* @sax: the SAX handler block
14588
* @cur: a pointer to an array of xmlChar
14589
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14592
* parse an XML in-memory document and build a tree.
14593
* It use the given SAX function block to handle the parsing callback.
14594
* If sax is NULL, fallback to the default DOM tree building routines.
14596
* Returns the resulting document tree
14600
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14602
xmlParserCtxtPtr ctxt;
14603
xmlSAXHandlerPtr oldsax = NULL;
14605
if (cur == NULL) return(NULL);
14608
ctxt = xmlCreateDocParserCtxt(cur);
14609
if (ctxt == NULL) return(NULL);
14611
oldsax = ctxt->sax;
14613
ctxt->userData = NULL;
14615
xmlDetectSAX2(ctxt);
14617
xmlParseDocument(ctxt);
14618
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14621
xmlFreeDoc(ctxt->myDoc);
14622
ctxt->myDoc = NULL;
14625
ctxt->sax = oldsax;
14626
xmlFreeParserCtxt(ctxt);
14633
* @cur: a pointer to an array of xmlChar
14635
* parse an XML in-memory document and build a tree.
14637
* Returns the resulting document tree
14641
xmlParseDoc(const xmlChar *cur) {
14642
return(xmlSAXParseDoc(NULL, cur, 0));
14644
#endif /* LIBXML_SAX1_ENABLED */
14646
#ifdef LIBXML_LEGACY_ENABLED
14647
/************************************************************************
14649
* Specific function to keep track of entities references *
14650
* and used by the XSLT debugger *
14652
************************************************************************/
14654
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14657
* xmlAddEntityReference:
14658
* @ent : A valid entity
14659
* @firstNode : A valid first node for children of entity
14660
* @lastNode : A valid last node of children entity
14662
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14665
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14666
xmlNodePtr lastNode)
14668
if (xmlEntityRefFunc != NULL) {
14669
(*xmlEntityRefFunc) (ent, firstNode, lastNode);
14675
* xmlSetEntityReferenceFunc:
14676
* @func: A valid function
14678
* Set the function to call call back when a xml reference has been made
14681
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14683
xmlEntityRefFunc = func;
14685
#endif /* LIBXML_LEGACY_ENABLED */
14687
/************************************************************************
14691
************************************************************************/
14693
#ifdef LIBXML_XPATH_ENABLED
14694
#include <libxml/xpath.h>
14697
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14698
static int xmlParserInitialized = 0;
14703
* Initialization function for the XML parser.
14704
* This is not reentrant. Call once before processing in case of
14705
* use in multithreaded programs.
14709
xmlInitParser(void) {
14710
if (xmlParserInitialized != 0)
14713
#ifdef LIBXML_THREAD_ENABLED
14714
__xmlGlobalInitMutexLock();
14715
if (xmlParserInitialized == 0) {
14719
if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14720
(xmlGenericError == NULL))
14721
initGenericErrorDefaultFunc(NULL);
14723
xmlInitializeDict();
14724
xmlInitCharEncodingHandlers();
14725
xmlDefaultSAXHandlerInit();
14726
xmlRegisterDefaultInputCallbacks();
14727
#ifdef LIBXML_OUTPUT_ENABLED
14728
xmlRegisterDefaultOutputCallbacks();
14729
#endif /* LIBXML_OUTPUT_ENABLED */
14730
#ifdef LIBXML_HTML_ENABLED
14731
htmlInitAutoClose();
14732
htmlDefaultSAXHandlerInit();
14734
#ifdef LIBXML_XPATH_ENABLED
14737
xmlParserInitialized = 1;
14738
#ifdef LIBXML_THREAD_ENABLED
14740
__xmlGlobalInitMutexUnlock();
14745
* xmlCleanupParser:
14747
* This function name is somewhat misleading. It does not clean up
14748
* parser state, it cleans up memory allocated by the library itself.
14749
* It is a cleanup function for the XML library. It tries to reclaim all
14750
* related global memory allocated for the library processing.
14751
* It doesn't deallocate any document related memory. One should
14752
* call xmlCleanupParser() only when the process has finished using
14753
* the library and all XML/HTML documents built with it.
14754
* See also xmlInitParser() which has the opposite function of preparing
14755
* the library for operations.
14757
* WARNING: if your application is multithreaded or has plugin support
14758
* calling this may crash the application if another thread or
14759
* a plugin is still using libxml2. It's sometimes very hard to
14760
* guess if libxml2 is in use in the application, some libraries
14761
* or plugins may use it without notice. In case of doubt abstain
14762
* from calling this function or do it just before calling exit()
14763
* to avoid leak reports from valgrind !
14767
xmlCleanupParser(void) {
14768
if (!xmlParserInitialized)
14771
xmlCleanupCharEncodingHandlers();
14772
#ifdef LIBXML_CATALOG_ENABLED
14773
xmlCatalogCleanup();
14776
xmlCleanupInputCallbacks();
14777
#ifdef LIBXML_OUTPUT_ENABLED
14778
xmlCleanupOutputCallbacks();
14780
#ifdef LIBXML_SCHEMAS_ENABLED
14781
xmlSchemaCleanupTypes();
14782
xmlRelaxNGCleanupTypes();
14784
xmlResetLastError();
14785
xmlCleanupGlobals();
14786
xmlCleanupThreads(); /* must be last if called not from the main thread */
14787
xmlCleanupMemory();
14788
xmlParserInitialized = 0;
14791
/************************************************************************
14793
* New set (2.6.0) of simpler and more flexible APIs *
14795
************************************************************************/
14801
* Free a string if it is not owned by the "dict" dictionnary in the
14804
#define DICT_FREE(str) \
14805
if ((str) && ((!dict) || \
14806
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14807
xmlFree((char *)(str));
14811
* @ctxt: an XML parser context
14813
* Reset a parser context
14816
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14818
xmlParserInputPtr input;
14826
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14827
xmlFreeInputStream(input);
14830
ctxt->input = NULL;
14833
if (ctxt->spaceTab != NULL) {
14834
ctxt->spaceTab[0] = -1;
14835
ctxt->space = &ctxt->spaceTab[0];
14837
ctxt->space = NULL;
14847
DICT_FREE(ctxt->version);
14848
ctxt->version = NULL;
14849
DICT_FREE(ctxt->encoding);
14850
ctxt->encoding = NULL;
14851
DICT_FREE(ctxt->directory);
14852
ctxt->directory = NULL;
14853
DICT_FREE(ctxt->extSubURI);
14854
ctxt->extSubURI = NULL;
14855
DICT_FREE(ctxt->extSubSystem);
14856
ctxt->extSubSystem = NULL;
14857
if (ctxt->myDoc != NULL)
14858
xmlFreeDoc(ctxt->myDoc);
14859
ctxt->myDoc = NULL;
14861
ctxt->standalone = -1;
14862
ctxt->hasExternalSubset = 0;
14863
ctxt->hasPErefs = 0;
14865
ctxt->external = 0;
14866
ctxt->instate = XML_PARSER_START;
14869
ctxt->wellFormed = 1;
14870
ctxt->nsWellFormed = 1;
14871
ctxt->disableSAX = 0;
14874
ctxt->vctxt.userData = ctxt;
14875
ctxt->vctxt.error = xmlParserValidityError;
14876
ctxt->vctxt.warning = xmlParserValidityWarning;
14878
ctxt->record_info = 0;
14880
ctxt->checkIndex = 0;
14881
ctxt->inSubset = 0;
14882
ctxt->errNo = XML_ERR_OK;
14884
ctxt->charset = XML_CHAR_ENCODING_UTF8;
14885
ctxt->catalogs = NULL;
14886
ctxt->nbentities = 0;
14887
ctxt->sizeentities = 0;
14888
ctxt->sizeentcopy = 0;
14889
xmlInitNodeInfoSeq(&ctxt->node_seq);
14891
if (ctxt->attsDefault != NULL) {
14892
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14893
ctxt->attsDefault = NULL;
14895
if (ctxt->attsSpecial != NULL) {
14896
xmlHashFree(ctxt->attsSpecial, NULL);
14897
ctxt->attsSpecial = NULL;
14900
#ifdef LIBXML_CATALOG_ENABLED
14901
if (ctxt->catalogs != NULL)
14902
xmlCatalogFreeLocal(ctxt->catalogs);
14904
if (ctxt->lastError.code != XML_ERR_OK)
14905
xmlResetError(&ctxt->lastError);
14909
* xmlCtxtResetPush:
14910
* @ctxt: an XML parser context
14911
* @chunk: a pointer to an array of chars
14912
* @size: number of chars in the array
14913
* @filename: an optional file name or URI
14914
* @encoding: the document encoding, or NULL
14916
* Reset a push parser context
14918
* Returns 0 in case of success and 1 in case of error
14921
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14922
int size, const char *filename, const char *encoding)
14924
xmlParserInputPtr inputStream;
14925
xmlParserInputBufferPtr buf;
14926
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14931
if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14932
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14934
buf = xmlAllocParserInputBuffer(enc);
14938
if (ctxt == NULL) {
14939
xmlFreeParserInputBuffer(buf);
14943
xmlCtxtReset(ctxt);
14945
if (ctxt->pushTab == NULL) {
14946
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14947
sizeof(xmlChar *));
14948
if (ctxt->pushTab == NULL) {
14949
xmlErrMemory(ctxt, NULL);
14950
xmlFreeParserInputBuffer(buf);
14955
if (filename == NULL) {
14956
ctxt->directory = NULL;
14958
ctxt->directory = xmlParserGetDirectory(filename);
14961
inputStream = xmlNewInputStream(ctxt);
14962
if (inputStream == NULL) {
14963
xmlFreeParserInputBuffer(buf);
14967
if (filename == NULL)
14968
inputStream->filename = NULL;
14970
inputStream->filename = (char *)
14971
xmlCanonicPath((const xmlChar *) filename);
14972
inputStream->buf = buf;
14973
xmlBufResetInput(buf->buffer, inputStream);
14975
inputPush(ctxt, inputStream);
14977
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14978
(ctxt->input->buf != NULL)) {
14979
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14980
size_t cur = ctxt->input->cur - ctxt->input->base;
14982
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14984
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14986
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14990
if (encoding != NULL) {
14991
xmlCharEncodingHandlerPtr hdlr;
14993
if (ctxt->encoding != NULL)
14994
xmlFree((xmlChar *) ctxt->encoding);
14995
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14997
hdlr = xmlFindCharEncodingHandler(encoding);
14998
if (hdlr != NULL) {
14999
xmlSwitchToEncoding(ctxt, hdlr);
15001
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15002
"Unsupported encoding %s\n", BAD_CAST encoding);
15004
} else if (enc != XML_CHAR_ENCODING_NONE) {
15005
xmlSwitchEncoding(ctxt, enc);
15013
* xmlCtxtUseOptionsInternal:
15014
* @ctxt: an XML parser context
15015
* @options: a combination of xmlParserOption
15016
* @encoding: the user provided encoding to use
15018
* Applies the options to the parser context
15020
* Returns 0 in case of success, the set of unknown or unimplemented options
15021
* in case of error.
15024
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15028
if (encoding != NULL) {
15029
if (ctxt->encoding != NULL)
15030
xmlFree((xmlChar *) ctxt->encoding);
15031
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15033
if (options & XML_PARSE_RECOVER) {
15034
ctxt->recovery = 1;
15035
options -= XML_PARSE_RECOVER;
15036
ctxt->options |= XML_PARSE_RECOVER;
15038
ctxt->recovery = 0;
15039
if (options & XML_PARSE_DTDLOAD) {
15040
ctxt->loadsubset = XML_DETECT_IDS;
15041
options -= XML_PARSE_DTDLOAD;
15042
ctxt->options |= XML_PARSE_DTDLOAD;
15044
ctxt->loadsubset = 0;
15045
if (options & XML_PARSE_DTDATTR) {
15046
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15047
options -= XML_PARSE_DTDATTR;
15048
ctxt->options |= XML_PARSE_DTDATTR;
15050
if (options & XML_PARSE_NOENT) {
15051
ctxt->replaceEntities = 1;
15052
/* ctxt->loadsubset |= XML_DETECT_IDS; */
15053
options -= XML_PARSE_NOENT;
15054
ctxt->options |= XML_PARSE_NOENT;
15056
ctxt->replaceEntities = 0;
15057
if (options & XML_PARSE_PEDANTIC) {
15058
ctxt->pedantic = 1;
15059
options -= XML_PARSE_PEDANTIC;
15060
ctxt->options |= XML_PARSE_PEDANTIC;
15062
ctxt->pedantic = 0;
15063
if (options & XML_PARSE_NOBLANKS) {
15064
ctxt->keepBlanks = 0;
15065
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15066
options -= XML_PARSE_NOBLANKS;
15067
ctxt->options |= XML_PARSE_NOBLANKS;
15069
ctxt->keepBlanks = 1;
15070
if (options & XML_PARSE_DTDVALID) {
15071
ctxt->validate = 1;
15072
if (options & XML_PARSE_NOWARNING)
15073
ctxt->vctxt.warning = NULL;
15074
if (options & XML_PARSE_NOERROR)
15075
ctxt->vctxt.error = NULL;
15076
options -= XML_PARSE_DTDVALID;
15077
ctxt->options |= XML_PARSE_DTDVALID;
15079
ctxt->validate = 0;
15080
if (options & XML_PARSE_NOWARNING) {
15081
ctxt->sax->warning = NULL;
15082
options -= XML_PARSE_NOWARNING;
15084
if (options & XML_PARSE_NOERROR) {
15085
ctxt->sax->error = NULL;
15086
ctxt->sax->fatalError = NULL;
15087
options -= XML_PARSE_NOERROR;
15089
#ifdef LIBXML_SAX1_ENABLED
15090
if (options & XML_PARSE_SAX1) {
15091
ctxt->sax->startElement = xmlSAX2StartElement;
15092
ctxt->sax->endElement = xmlSAX2EndElement;
15093
ctxt->sax->startElementNs = NULL;
15094
ctxt->sax->endElementNs = NULL;
15095
ctxt->sax->initialized = 1;
15096
options -= XML_PARSE_SAX1;
15097
ctxt->options |= XML_PARSE_SAX1;
15099
#endif /* LIBXML_SAX1_ENABLED */
15100
if (options & XML_PARSE_NODICT) {
15101
ctxt->dictNames = 0;
15102
options -= XML_PARSE_NODICT;
15103
ctxt->options |= XML_PARSE_NODICT;
15105
ctxt->dictNames = 1;
15107
if (options & XML_PARSE_NOCDATA) {
15108
ctxt->sax->cdataBlock = NULL;
15109
options -= XML_PARSE_NOCDATA;
15110
ctxt->options |= XML_PARSE_NOCDATA;
15112
if (options & XML_PARSE_NSCLEAN) {
15113
ctxt->options |= XML_PARSE_NSCLEAN;
15114
options -= XML_PARSE_NSCLEAN;
15116
if (options & XML_PARSE_NONET) {
15117
ctxt->options |= XML_PARSE_NONET;
15118
options -= XML_PARSE_NONET;
15120
if (options & XML_PARSE_COMPACT) {
15121
ctxt->options |= XML_PARSE_COMPACT;
15122
options -= XML_PARSE_COMPACT;
15124
if (options & XML_PARSE_OLD10) {
15125
ctxt->options |= XML_PARSE_OLD10;
15126
options -= XML_PARSE_OLD10;
15128
if (options & XML_PARSE_NOBASEFIX) {
15129
ctxt->options |= XML_PARSE_NOBASEFIX;
15130
options -= XML_PARSE_NOBASEFIX;
15132
if (options & XML_PARSE_HUGE) {
15133
ctxt->options |= XML_PARSE_HUGE;
15134
options -= XML_PARSE_HUGE;
15135
if (ctxt->dict != NULL)
15136
xmlDictSetLimit(ctxt->dict, 0);
15138
if (options & XML_PARSE_OLDSAX) {
15139
ctxt->options |= XML_PARSE_OLDSAX;
15140
options -= XML_PARSE_OLDSAX;
15142
if (options & XML_PARSE_IGNORE_ENC) {
15143
ctxt->options |= XML_PARSE_IGNORE_ENC;
15144
options -= XML_PARSE_IGNORE_ENC;
15146
if (options & XML_PARSE_BIG_LINES) {
15147
ctxt->options |= XML_PARSE_BIG_LINES;
15148
options -= XML_PARSE_BIG_LINES;
15150
ctxt->linenumbers = 1;
15155
* xmlCtxtUseOptions:
15156
* @ctxt: an XML parser context
15157
* @options: a combination of xmlParserOption
15159
* Applies the options to the parser context
15161
* Returns 0 in case of success, the set of unknown or unimplemented options
15162
* in case of error.
15165
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15167
return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15172
* @ctxt: an XML parser context
15173
* @URL: the base URL to use for the document
15174
* @encoding: the document encoding, or NULL
15175
* @options: a combination of xmlParserOption
15176
* @reuse: keep the context for reuse
15178
* Common front-end for the xmlRead functions
15180
* Returns the resulting document tree or NULL
15183
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15184
int options, int reuse)
15188
xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15189
if (encoding != NULL) {
15190
xmlCharEncodingHandlerPtr hdlr;
15192
hdlr = xmlFindCharEncodingHandler(encoding);
15194
xmlSwitchToEncoding(ctxt, hdlr);
15196
if ((URL != NULL) && (ctxt->input != NULL) &&
15197
(ctxt->input->filename == NULL))
15198
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15199
xmlParseDocument(ctxt);
15200
if ((ctxt->wellFormed) || ctxt->recovery)
15204
if (ctxt->myDoc != NULL) {
15205
xmlFreeDoc(ctxt->myDoc);
15208
ctxt->myDoc = NULL;
15210
xmlFreeParserCtxt(ctxt);
15218
* @cur: a pointer to a zero terminated string
15219
* @URL: the base URL to use for the document
15220
* @encoding: the document encoding, or NULL
15221
* @options: a combination of xmlParserOption
15223
* parse an XML in-memory document and build a tree.
15225
* Returns the resulting document tree
15228
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15230
xmlParserCtxtPtr ctxt;
15235
ctxt = xmlCreateDocParserCtxt(cur);
15238
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15243
* @filename: a file or URL
15244
* @encoding: the document encoding, or NULL
15245
* @options: a combination of xmlParserOption
15247
* parse an XML file from the filesystem or the network.
15249
* Returns the resulting document tree
15252
xmlReadFile(const char *filename, const char *encoding, int options)
15254
xmlParserCtxtPtr ctxt;
15256
ctxt = xmlCreateURLParserCtxt(filename, options);
15259
return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15264
* @buffer: a pointer to a char array
15265
* @size: the size of the array
15266
* @URL: the base URL to use for the document
15267
* @encoding: the document encoding, or NULL
15268
* @options: a combination of xmlParserOption
15270
* parse an XML in-memory document and build a tree.
15272
* Returns the resulting document tree
15275
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15277
xmlParserCtxtPtr ctxt;
15279
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15282
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15287
* @fd: an open file descriptor
15288
* @URL: the base URL to use for the document
15289
* @encoding: the document encoding, or NULL
15290
* @options: a combination of xmlParserOption
15292
* parse an XML from a file descriptor and build a tree.
15293
* NOTE that the file descriptor will not be closed when the
15294
* reader is closed or reset.
15296
* Returns the resulting document tree
15299
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15301
xmlParserCtxtPtr ctxt;
15302
xmlParserInputBufferPtr input;
15303
xmlParserInputPtr stream;
15308
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15311
input->closecallback = NULL;
15312
ctxt = xmlNewParserCtxt();
15313
if (ctxt == NULL) {
15314
xmlFreeParserInputBuffer(input);
15317
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15318
if (stream == NULL) {
15319
xmlFreeParserInputBuffer(input);
15320
xmlFreeParserCtxt(ctxt);
15323
inputPush(ctxt, stream);
15324
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15329
* @ioread: an I/O read function
15330
* @ioclose: an I/O close function
15331
* @ioctx: an I/O handler
15332
* @URL: the base URL to use for the document
15333
* @encoding: the document encoding, or NULL
15334
* @options: a combination of xmlParserOption
15336
* parse an XML document from I/O functions and source and build a tree.
15338
* Returns the resulting document tree
15341
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15342
void *ioctx, const char *URL, const char *encoding, int options)
15344
xmlParserCtxtPtr ctxt;
15345
xmlParserInputBufferPtr input;
15346
xmlParserInputPtr stream;
15348
if (ioread == NULL)
15351
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15352
XML_CHAR_ENCODING_NONE);
15353
if (input == NULL) {
15354
if (ioclose != NULL)
15358
ctxt = xmlNewParserCtxt();
15359
if (ctxt == NULL) {
15360
xmlFreeParserInputBuffer(input);
15363
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15364
if (stream == NULL) {
15365
xmlFreeParserInputBuffer(input);
15366
xmlFreeParserCtxt(ctxt);
15369
inputPush(ctxt, stream);
15370
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15375
* @ctxt: an XML parser context
15376
* @cur: a pointer to a zero terminated string
15377
* @URL: the base URL to use for the document
15378
* @encoding: the document encoding, or NULL
15379
* @options: a combination of xmlParserOption
15381
* parse an XML in-memory document and build a tree.
15382
* This reuses the existing @ctxt parser context
15384
* Returns the resulting document tree
15387
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15388
const char *URL, const char *encoding, int options)
15390
xmlParserInputPtr stream;
15397
xmlCtxtReset(ctxt);
15399
stream = xmlNewStringInputStream(ctxt, cur);
15400
if (stream == NULL) {
15403
inputPush(ctxt, stream);
15404
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15409
* @ctxt: an XML parser context
15410
* @filename: a file or URL
15411
* @encoding: the document encoding, or NULL
15412
* @options: a combination of xmlParserOption
15414
* parse an XML file from the filesystem or the network.
15415
* This reuses the existing @ctxt parser context
15417
* Returns the resulting document tree
15420
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15421
const char *encoding, int options)
15423
xmlParserInputPtr stream;
15425
if (filename == NULL)
15430
xmlCtxtReset(ctxt);
15432
stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15433
if (stream == NULL) {
15436
inputPush(ctxt, stream);
15437
return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15441
* xmlCtxtReadMemory:
15442
* @ctxt: an XML parser context
15443
* @buffer: a pointer to a char array
15444
* @size: the size of the array
15445
* @URL: the base URL to use for the document
15446
* @encoding: the document encoding, or NULL
15447
* @options: a combination of xmlParserOption
15449
* parse an XML in-memory document and build a tree.
15450
* This reuses the existing @ctxt parser context
15452
* Returns the resulting document tree
15455
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15456
const char *URL, const char *encoding, int options)
15458
xmlParserInputBufferPtr input;
15459
xmlParserInputPtr stream;
15463
if (buffer == NULL)
15466
xmlCtxtReset(ctxt);
15468
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15469
if (input == NULL) {
15473
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15474
if (stream == NULL) {
15475
xmlFreeParserInputBuffer(input);
15479
inputPush(ctxt, stream);
15480
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15485
* @ctxt: an XML parser context
15486
* @fd: an open file descriptor
15487
* @URL: the base URL to use for the document
15488
* @encoding: the document encoding, or NULL
15489
* @options: a combination of xmlParserOption
15491
* parse an XML from a file descriptor and build a tree.
15492
* This reuses the existing @ctxt parser context
15493
* NOTE that the file descriptor will not be closed when the
15494
* reader is closed or reset.
15496
* Returns the resulting document tree
15499
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15500
const char *URL, const char *encoding, int options)
15502
xmlParserInputBufferPtr input;
15503
xmlParserInputPtr stream;
15510
xmlCtxtReset(ctxt);
15513
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15516
input->closecallback = NULL;
15517
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15518
if (stream == NULL) {
15519
xmlFreeParserInputBuffer(input);
15522
inputPush(ctxt, stream);
15523
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15528
* @ctxt: an XML parser context
15529
* @ioread: an I/O read function
15530
* @ioclose: an I/O close function
15531
* @ioctx: an I/O handler
15532
* @URL: the base URL to use for the document
15533
* @encoding: the document encoding, or NULL
15534
* @options: a combination of xmlParserOption
15536
* parse an XML document from I/O functions and source and build a tree.
15537
* This reuses the existing @ctxt parser context
15539
* Returns the resulting document tree
15542
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15543
xmlInputCloseCallback ioclose, void *ioctx,
15545
const char *encoding, int options)
15547
xmlParserInputBufferPtr input;
15548
xmlParserInputPtr stream;
15550
if (ioread == NULL)
15555
xmlCtxtReset(ctxt);
15557
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15558
XML_CHAR_ENCODING_NONE);
15559
if (input == NULL) {
15560
if (ioclose != NULL)
15564
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15565
if (stream == NULL) {
15566
xmlFreeParserInputBuffer(input);
15569
inputPush(ctxt, stream);
15570
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15573
#define bottom_parser
15574
#include "elfgcchack.h"