2
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
* implemented on top of the SAX interfaces
6
* The XML specification:
7
* http://www.w3.org/TR/REC-xml
8
* Original 1.0 version:
9
* http://www.w3.org/TR/1998/REC-xml-19980210
10
* XML second edition working draft
11
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
13
* Okay this is a big file, the parser core is around 7000 lines, then it
14
* is followed by the progressive parser top routines, then the various
15
* high level APIs to call the parser and a few miscellaneous functions.
16
* A number of helper functions and deprecated ones have been moved to
17
* parserInternals.c to reduce this file size.
18
* As much as possible the functions are associated with their relative
19
* production in the XML specification. A few productions defining the
20
* different ranges of character are actually implanted either in
21
* parserInternals.h or parserInternals.c
22
* The DOM tree build is realized from the default SAX callbacks in
24
* The routines doing the validation checks are in valid.c and called either
25
* from the SAX callbacks or as standalone functions using a preparsed
28
* See Copyright for the status of this software.
36
#if defined(WIN32) && !defined (__CYGWIN__)
37
#define XML_DIR_SEP '\\'
39
#define XML_DIR_SEP '/'
46
#include <libxml/xmlmemory.h>
47
#include <libxml/threads.h>
48
#include <libxml/globals.h>
49
#include <libxml/tree.h>
50
#include <libxml/parser.h>
51
#include <libxml/parserInternals.h>
52
#include <libxml/valid.h>
53
#include <libxml/entities.h>
54
#include <libxml/xmlerror.h>
55
#include <libxml/encoding.h>
56
#include <libxml/xmlIO.h>
57
#include <libxml/uri.h>
58
#ifdef LIBXML_CATALOG_ENABLED
59
#include <libxml/catalog.h>
61
#ifdef LIBXML_SCHEMAS_ENABLED
62
#include <libxml/xmlschemastypes.h>
63
#include <libxml/relaxng.h>
71
#ifdef HAVE_SYS_STAT_H
91
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93
static xmlParserCtxtPtr
94
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95
const xmlChar *base, xmlParserCtxtPtr pctx);
97
/************************************************************************
99
* Arbitrary limits set in the parser. See XML_PARSE_HUGE *
101
************************************************************************/
103
#define XML_PARSER_BIG_ENTITY 1000
104
#define XML_PARSER_LOT_ENTITY 5000
107
* XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108
* replacement over the size in byte of the input indicates that you have
109
* and eponential behaviour. A value of 10 correspond to at least 3 entity
110
* replacement per byte of input.
112
#define XML_PARSER_NON_LINEAR 10
115
* xmlParserEntityCheck
117
* Function to check non-linear entity expansion behaviour
118
* This is here to detect and stop exponential linear entity expansion
119
* This is not a limitation of the parser but a safety
120
* boundary feature. It can be disabled with the XML_PARSE_HUGE
124
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125
xmlEntityPtr ent, size_t replacement)
129
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
131
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
133
if (replacement != 0) {
134
if (replacement < XML_MAX_TEXT_LENGTH)
138
* If the volume of entity copy reaches 10 times the
139
* amount of parsed data and over the large text threshold
140
* then that's very likely to be an abuse.
142
if (ctxt->input != NULL) {
143
consumed = ctxt->input->consumed +
144
(ctxt->input->cur - ctxt->input->base);
146
consumed += ctxt->sizeentities;
148
if (replacement < XML_PARSER_NON_LINEAR * consumed)
150
} else if (size != 0) {
152
* Do the check based on the replacement size of the entity
154
if (size < XML_PARSER_BIG_ENTITY)
158
* A limit on the amount of text data reasonably used
160
if (ctxt->input != NULL) {
161
consumed = ctxt->input->consumed +
162
(ctxt->input->cur - ctxt->input->base);
164
consumed += ctxt->sizeentities;
166
if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167
(ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
169
} else if (ent != NULL) {
171
* use the number of parsed entities in the replacement
173
size = ent->checked / 2;
176
* The amount of data parsed counting entities size only once
178
if (ctxt->input != NULL) {
179
consumed = ctxt->input->consumed +
180
(ctxt->input->cur - ctxt->input->base);
182
consumed += ctxt->sizeentities;
185
* Check the density of entities for the amount of data
186
* knowing an entity reference will take at least 3 bytes
188
if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
192
* strange we got no data for checking just return
196
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
203
* arbitrary depth limit for the XML documents that we allow to
204
* process. This is not a limitation of the parser but a safety
205
* boundary feature. It can be disabled with the XML_PARSE_HUGE
208
unsigned int xmlParserMaxDepth = 256;
213
#define XML_PARSER_BIG_BUFFER_SIZE 300
214
#define XML_PARSER_BUFFER_SIZE 100
215
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
218
* XML_PARSER_CHUNK_SIZE
220
* When calling GROW that's the minimal amount of data
221
* the parser expected to have received. It is not a hard
222
* limit but an optimization when reading strings like Names
223
* It is not strictly needed as long as inputs available characters
224
* are followed by 0, which should be provided by the I/O level
226
#define XML_PARSER_CHUNK_SIZE 100
229
* List of XML prefixed PI allowed by W3C specs
232
static const char *xmlW3CPIs[] = {
239
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
240
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241
const xmlChar **str);
243
static xmlParserErrors
244
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245
xmlSAXHandlerPtr sax,
246
void *user_data, int depth, const xmlChar *URL,
247
const xmlChar *ID, xmlNodePtr *list);
250
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251
const char *encoding);
252
#ifdef LIBXML_LEGACY_ENABLED
254
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255
xmlNodePtr lastNode);
256
#endif /* LIBXML_LEGACY_ENABLED */
258
static xmlParserErrors
259
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260
const xmlChar *string, void *user_data, xmlNodePtr *lst);
263
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
265
/************************************************************************
267
* Some factorized error routines *
269
************************************************************************/
272
* xmlErrAttributeDup:
273
* @ctxt: an XML parser context
274
* @prefix: the attribute prefix
275
* @localname: the attribute localname
277
* Handle a redefinition of attribute error
280
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281
const xmlChar * localname)
283
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284
(ctxt->instate == XML_PARSER_EOF))
287
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
290
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
291
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
292
(const char *) localname, NULL, NULL, 0, 0,
293
"Attribute %s redefined\n", localname);
295
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
296
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
297
(const char *) prefix, (const char *) localname,
298
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
301
ctxt->wellFormed = 0;
302
if (ctxt->recovery == 0)
303
ctxt->disableSAX = 1;
309
* @ctxt: an XML parser context
310
* @error: the error number
311
* @extra: extra information string
313
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
316
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
319
char errstr[129] = "";
321
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322
(ctxt->instate == XML_PARSER_EOF))
325
case XML_ERR_INVALID_HEX_CHARREF:
326
errmsg = "CharRef: invalid hexadecimal value";
328
case XML_ERR_INVALID_DEC_CHARREF:
329
errmsg = "CharRef: invalid decimal value";
331
case XML_ERR_INVALID_CHARREF:
332
errmsg = "CharRef: invalid value";
334
case XML_ERR_INTERNAL_ERROR:
335
errmsg = "internal error";
337
case XML_ERR_PEREF_AT_EOF:
338
errmsg = "PEReference at end of document";
340
case XML_ERR_PEREF_IN_PROLOG:
341
errmsg = "PEReference in prolog";
343
case XML_ERR_PEREF_IN_EPILOG:
344
errmsg = "PEReference in epilog";
346
case XML_ERR_PEREF_NO_NAME:
347
errmsg = "PEReference: no name";
349
case XML_ERR_PEREF_SEMICOL_MISSING:
350
errmsg = "PEReference: expecting ';'";
352
case XML_ERR_ENTITY_LOOP:
353
errmsg = "Detected an entity reference loop";
355
case XML_ERR_ENTITY_NOT_STARTED:
356
errmsg = "EntityValue: \" or ' expected";
358
case XML_ERR_ENTITY_PE_INTERNAL:
359
errmsg = "PEReferences forbidden in internal subset";
361
case XML_ERR_ENTITY_NOT_FINISHED:
362
errmsg = "EntityValue: \" or ' expected";
364
case XML_ERR_ATTRIBUTE_NOT_STARTED:
365
errmsg = "AttValue: \" or ' expected";
367
case XML_ERR_LT_IN_ATTRIBUTE:
368
errmsg = "Unescaped '<' not allowed in attributes values";
370
case XML_ERR_LITERAL_NOT_STARTED:
371
errmsg = "SystemLiteral \" or ' expected";
373
case XML_ERR_LITERAL_NOT_FINISHED:
374
errmsg = "Unfinished System or Public ID \" or ' expected";
376
case XML_ERR_MISPLACED_CDATA_END:
377
errmsg = "Sequence ']]>' not allowed in content";
379
case XML_ERR_URI_REQUIRED:
380
errmsg = "SYSTEM or PUBLIC, the URI is missing";
382
case XML_ERR_PUBID_REQUIRED:
383
errmsg = "PUBLIC, the Public Identifier is missing";
385
case XML_ERR_HYPHEN_IN_COMMENT:
386
errmsg = "Comment must not contain '--' (double-hyphen)";
388
case XML_ERR_PI_NOT_STARTED:
389
errmsg = "xmlParsePI : no target name";
391
case XML_ERR_RESERVED_XML_NAME:
392
errmsg = "Invalid PI name";
394
case XML_ERR_NOTATION_NOT_STARTED:
395
errmsg = "NOTATION: Name expected here";
397
case XML_ERR_NOTATION_NOT_FINISHED:
398
errmsg = "'>' required to close NOTATION declaration";
400
case XML_ERR_VALUE_REQUIRED:
401
errmsg = "Entity value required";
403
case XML_ERR_URI_FRAGMENT:
404
errmsg = "Fragment not allowed";
406
case XML_ERR_ATTLIST_NOT_STARTED:
407
errmsg = "'(' required to start ATTLIST enumeration";
409
case XML_ERR_NMTOKEN_REQUIRED:
410
errmsg = "NmToken expected in ATTLIST enumeration";
412
case XML_ERR_ATTLIST_NOT_FINISHED:
413
errmsg = "')' required to finish ATTLIST enumeration";
415
case XML_ERR_MIXED_NOT_STARTED:
416
errmsg = "MixedContentDecl : '|' or ')*' expected";
418
case XML_ERR_PCDATA_REQUIRED:
419
errmsg = "MixedContentDecl : '#PCDATA' expected";
421
case XML_ERR_ELEMCONTENT_NOT_STARTED:
422
errmsg = "ContentDecl : Name or '(' expected";
424
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
425
errmsg = "ContentDecl : ',' '|' or ')' expected";
427
case XML_ERR_PEREF_IN_INT_SUBSET:
429
"PEReference: forbidden within markup decl in internal subset";
431
case XML_ERR_GT_REQUIRED:
432
errmsg = "expected '>'";
434
case XML_ERR_CONDSEC_INVALID:
435
errmsg = "XML conditional section '[' expected";
437
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
438
errmsg = "Content error in the external subset";
440
case XML_ERR_CONDSEC_INVALID_KEYWORD:
442
"conditional section INCLUDE or IGNORE keyword expected";
444
case XML_ERR_CONDSEC_NOT_FINISHED:
445
errmsg = "XML conditional section not closed";
447
case XML_ERR_XMLDECL_NOT_STARTED:
448
errmsg = "Text declaration '<?xml' required";
450
case XML_ERR_XMLDECL_NOT_FINISHED:
451
errmsg = "parsing XML declaration: '?>' expected";
453
case XML_ERR_EXT_ENTITY_STANDALONE:
454
errmsg = "external parsed entities cannot be standalone";
456
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
457
errmsg = "EntityRef: expecting ';'";
459
case XML_ERR_DOCTYPE_NOT_FINISHED:
460
errmsg = "DOCTYPE improperly terminated";
462
case XML_ERR_LTSLASH_REQUIRED:
463
errmsg = "EndTag: '</' not found";
465
case XML_ERR_EQUAL_REQUIRED:
466
errmsg = "expected '='";
468
case XML_ERR_STRING_NOT_CLOSED:
469
errmsg = "String not closed expecting \" or '";
471
case XML_ERR_STRING_NOT_STARTED:
472
errmsg = "String not started expecting ' or \"";
474
case XML_ERR_ENCODING_NAME:
475
errmsg = "Invalid XML encoding name";
477
case XML_ERR_STANDALONE_VALUE:
478
errmsg = "standalone accepts only 'yes' or 'no'";
480
case XML_ERR_DOCUMENT_EMPTY:
481
errmsg = "Document is empty";
483
case XML_ERR_DOCUMENT_END:
484
errmsg = "Extra content at the end of the document";
486
case XML_ERR_NOT_WELL_BALANCED:
487
errmsg = "chunk is not well balanced";
489
case XML_ERR_EXTRA_CONTENT:
490
errmsg = "extra content at the end of well balanced chunk";
492
case XML_ERR_VERSION_MISSING:
493
errmsg = "Malformed declaration expecting version";
495
case XML_ERR_NAME_TOO_LONG:
496
errmsg = "Name too long use XML_PARSE_HUGE option";
504
errmsg = "Unregistered error message";
507
snprintf(errstr, 128, "%s\n", errmsg);
509
snprintf(errstr, 128, "%s: %%s\n", errmsg);
512
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
513
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
516
ctxt->wellFormed = 0;
517
if (ctxt->recovery == 0)
518
ctxt->disableSAX = 1;
524
* @ctxt: an XML parser context
525
* @error: the error number
526
* @msg: the error message
528
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
531
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
534
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535
(ctxt->instate == XML_PARSER_EOF))
539
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
540
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
542
ctxt->wellFormed = 0;
543
if (ctxt->recovery == 0)
544
ctxt->disableSAX = 1;
550
* @ctxt: an XML parser context
551
* @error: the error number
552
* @msg: the error message
559
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560
const char *msg, const xmlChar *str1, const xmlChar *str2)
562
xmlStructuredErrorFunc schannel = NULL;
564
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565
(ctxt->instate == XML_PARSER_EOF))
567
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568
(ctxt->sax->initialized == XML_SAX2_MAGIC))
569
schannel = ctxt->sax->serror;
571
__xmlRaiseError(schannel,
572
(ctxt->sax) ? ctxt->sax->warning : NULL,
574
ctxt, NULL, XML_FROM_PARSER, error,
575
XML_ERR_WARNING, NULL, 0,
576
(const char *) str1, (const char *) str2, NULL, 0, 0,
577
msg, (const char *) str1, (const char *) str2);
579
__xmlRaiseError(schannel, NULL, NULL,
580
ctxt, NULL, XML_FROM_PARSER, error,
581
XML_ERR_WARNING, NULL, 0,
582
(const char *) str1, (const char *) str2, NULL, 0, 0,
583
msg, (const char *) str1, (const char *) str2);
589
* @ctxt: an XML parser context
590
* @error: the error number
591
* @msg: the error message
594
* Handle a validity error.
597
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
598
const char *msg, const xmlChar *str1, const xmlChar *str2)
600
xmlStructuredErrorFunc schannel = NULL;
602
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603
(ctxt->instate == XML_PARSER_EOF))
607
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608
schannel = ctxt->sax->serror;
611
__xmlRaiseError(schannel,
612
ctxt->vctxt.error, ctxt->vctxt.userData,
613
ctxt, NULL, XML_FROM_DTD, error,
614
XML_ERR_ERROR, NULL, 0, (const char *) str1,
615
(const char *) str2, NULL, 0, 0,
616
msg, (const char *) str1, (const char *) str2);
619
__xmlRaiseError(schannel, NULL, NULL,
620
ctxt, NULL, XML_FROM_DTD, error,
621
XML_ERR_ERROR, NULL, 0, (const char *) str1,
622
(const char *) str2, NULL, 0, 0,
623
msg, (const char *) str1, (const char *) str2);
629
* @ctxt: an XML parser context
630
* @error: the error number
631
* @msg: the error message
632
* @val: an integer value
634
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
637
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
638
const char *msg, int val)
640
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641
(ctxt->instate == XML_PARSER_EOF))
645
__xmlRaiseError(NULL, NULL, NULL,
646
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
649
ctxt->wellFormed = 0;
650
if (ctxt->recovery == 0)
651
ctxt->disableSAX = 1;
656
* xmlFatalErrMsgStrIntStr:
657
* @ctxt: an XML parser context
658
* @error: the error number
659
* @msg: the error message
660
* @str1: an string info
661
* @val: an integer value
662
* @str2: an string info
664
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
667
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
668
const char *msg, const xmlChar *str1, int val,
671
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672
(ctxt->instate == XML_PARSER_EOF))
676
__xmlRaiseError(NULL, NULL, NULL,
677
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678
NULL, 0, (const char *) str1, (const char *) str2,
679
NULL, val, 0, msg, str1, val, str2);
681
ctxt->wellFormed = 0;
682
if (ctxt->recovery == 0)
683
ctxt->disableSAX = 1;
689
* @ctxt: an XML parser context
690
* @error: the error number
691
* @msg: the error message
692
* @val: a string value
694
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
697
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
698
const char *msg, const xmlChar * val)
700
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701
(ctxt->instate == XML_PARSER_EOF))
705
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
706
XML_FROM_PARSER, error, XML_ERR_FATAL,
707
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
710
ctxt->wellFormed = 0;
711
if (ctxt->recovery == 0)
712
ctxt->disableSAX = 1;
718
* @ctxt: an XML parser context
719
* @error: the error number
720
* @msg: the error message
721
* @val: a string value
723
* Handle a non fatal parser error
726
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727
const char *msg, const xmlChar * val)
729
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730
(ctxt->instate == XML_PARSER_EOF))
734
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
735
XML_FROM_PARSER, error, XML_ERR_ERROR,
736
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
742
* @ctxt: an XML parser context
743
* @error: the error number
745
* @info1: extra information string
746
* @info2: extra information string
748
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
751
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753
const xmlChar * info1, const xmlChar * info2,
754
const xmlChar * info3)
756
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757
(ctxt->instate == XML_PARSER_EOF))
761
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
762
XML_ERR_ERROR, NULL, 0, (const char *) info1,
763
(const char *) info2, (const char *) info3, 0, 0, msg,
764
info1, info2, info3);
766
ctxt->nsWellFormed = 0;
771
* @ctxt: an XML parser context
772
* @error: the error number
774
* @info1: extra information string
775
* @info2: extra information string
777
* Handle a namespace warning error
780
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
782
const xmlChar * info1, const xmlChar * info2,
783
const xmlChar * info3)
785
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786
(ctxt->instate == XML_PARSER_EOF))
788
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789
XML_ERR_WARNING, NULL, 0, (const char *) info1,
790
(const char *) info2, (const char *) info3, 0, 0, msg,
791
info1, info2, info3);
794
/************************************************************************
796
* Library wide options *
798
************************************************************************/
802
* @feature: the feature to be examined
804
* Examines if the library has been compiled with a given feature.
806
* Returns a non-zero value if the feature exist, otherwise zero.
807
* Returns zero (0) if the feature does not exist or an unknown
808
* unknown feature is requested, non-zero otherwise.
811
xmlHasFeature(xmlFeature feature)
814
case XML_WITH_THREAD:
815
#ifdef LIBXML_THREAD_ENABLED
821
#ifdef LIBXML_TREE_ENABLED
826
case XML_WITH_OUTPUT:
827
#ifdef LIBXML_OUTPUT_ENABLED
833
#ifdef LIBXML_PUSH_ENABLED
838
case XML_WITH_READER:
839
#ifdef LIBXML_READER_ENABLED
844
case XML_WITH_PATTERN:
845
#ifdef LIBXML_PATTERN_ENABLED
850
case XML_WITH_WRITER:
851
#ifdef LIBXML_WRITER_ENABLED
857
#ifdef LIBXML_SAX1_ENABLED
863
#ifdef LIBXML_FTP_ENABLED
869
#ifdef LIBXML_HTTP_ENABLED
875
#ifdef LIBXML_VALID_ENABLED
881
#ifdef LIBXML_HTML_ENABLED
886
case XML_WITH_LEGACY:
887
#ifdef LIBXML_LEGACY_ENABLED
893
#ifdef LIBXML_C14N_ENABLED
898
case XML_WITH_CATALOG:
899
#ifdef LIBXML_CATALOG_ENABLED
905
#ifdef LIBXML_XPATH_ENABLED
911
#ifdef LIBXML_XPTR_ENABLED
916
case XML_WITH_XINCLUDE:
917
#ifdef LIBXML_XINCLUDE_ENABLED
923
#ifdef LIBXML_ICONV_ENABLED
928
case XML_WITH_ISO8859X:
929
#ifdef LIBXML_ISO8859X_ENABLED
934
case XML_WITH_UNICODE:
935
#ifdef LIBXML_UNICODE_ENABLED
940
case XML_WITH_REGEXP:
941
#ifdef LIBXML_REGEXP_ENABLED
946
case XML_WITH_AUTOMATA:
947
#ifdef LIBXML_AUTOMATA_ENABLED
953
#ifdef LIBXML_EXPR_ENABLED
958
case XML_WITH_SCHEMAS:
959
#ifdef LIBXML_SCHEMAS_ENABLED
964
case XML_WITH_SCHEMATRON:
965
#ifdef LIBXML_SCHEMATRON_ENABLED
970
case XML_WITH_MODULES:
971
#ifdef LIBXML_MODULES_ENABLED
977
#ifdef LIBXML_DEBUG_ENABLED
982
case XML_WITH_DEBUG_MEM:
983
#ifdef DEBUG_MEMORY_LOCATION
988
case XML_WITH_DEBUG_RUN:
989
#ifdef LIBXML_DEBUG_RUNTIME
995
#ifdef LIBXML_ZLIB_ENABLED
1001
#ifdef LIBXML_LZMA_ENABLED
1007
#ifdef LIBXML_ICU_ENABLED
1018
/************************************************************************
1020
* SAX2 defaulted attributes handling *
1022
************************************************************************/
1026
* @ctxt: an XML parser context
1028
* Do the SAX2 detection and specific intialization
1031
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032
if (ctxt == NULL) return;
1033
#ifdef LIBXML_SAX1_ENABLED
1034
if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035
((ctxt->sax->startElementNs != NULL) ||
1036
(ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1039
#endif /* LIBXML_SAX1_ENABLED */
1041
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1044
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045
(ctxt->str_xml_ns == NULL)) {
1046
xmlErrMemory(ctxt, NULL);
1050
typedef struct _xmlDefAttrs xmlDefAttrs;
1051
typedef xmlDefAttrs *xmlDefAttrsPtr;
1052
struct _xmlDefAttrs {
1053
int nbAttrs; /* number of defaulted attributes on that element */
1054
int maxAttrs; /* the size of the array */
1055
const xmlChar *values[5]; /* array of localname/prefix/values/external */
1059
* xmlAttrNormalizeSpace:
1060
* @src: the source string
1061
* @dst: the target string
1063
* Normalize the space in non CDATA attribute values:
1064
* If the attribute type is not CDATA, then the XML processor MUST further
1065
* process the normalized attribute value by discarding any leading and
1066
* trailing space (#x20) characters, and by replacing sequences of space
1067
* (#x20) characters by a single space (#x20) character.
1068
* Note that the size of dst need to be at least src, and if one doesn't need
1069
* to preserve dst (and it doesn't come from a dictionary or read-only) then
1070
* passing src as dst is just fine.
1072
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1076
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1078
if ((src == NULL) || (dst == NULL))
1081
while (*src == 0x20) src++;
1084
while (*src == 0x20) src++;
1098
* xmlAttrNormalizeSpace2:
1099
* @src: the source string
1101
* Normalize the space in non CDATA attribute values, a slightly more complex
1102
* front end to avoid allocation problems when running on attribute values
1103
* coming from the input.
1105
* Returns a pointer to the normalized value (dst) or NULL if no conversion
1108
static const xmlChar *
1109
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1112
int remove_head = 0;
1113
int need_realloc = 0;
1116
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1123
while (*cur == 0x20) {
1130
if ((*cur == 0x20) || (*cur == 0)) {
1140
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1142
xmlErrMemory(ctxt, NULL);
1145
xmlAttrNormalizeSpace(ret, ret);
1146
*len = (int) strlen((const char *)ret);
1148
} else if (remove_head) {
1149
*len -= remove_head;
1150
memmove(src, src + remove_head, 1 + *len);
1158
* @ctxt: an XML parser context
1159
* @fullname: the element fullname
1160
* @fullattr: the attribute fullname
1161
* @value: the attribute value
1163
* Add a defaulted attribute for an element
1166
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167
const xmlChar *fullname,
1168
const xmlChar *fullattr,
1169
const xmlChar *value) {
1170
xmlDefAttrsPtr defaults;
1172
const xmlChar *name;
1173
const xmlChar *prefix;
1176
* Allows to detect attribute redefinitions
1178
if (ctxt->attsSpecial != NULL) {
1179
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1183
if (ctxt->attsDefault == NULL) {
1184
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1185
if (ctxt->attsDefault == NULL)
1190
* split the element name into prefix:localname , the string found
1191
* are within the DTD and then not associated to namespace names.
1193
name = xmlSplitQName3(fullname, &len);
1195
name = xmlDictLookup(ctxt->dict, fullname, -1);
1198
name = xmlDictLookup(ctxt->dict, name, -1);
1199
prefix = xmlDictLookup(ctxt->dict, fullname, len);
1203
* make sure there is some storage
1205
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206
if (defaults == NULL) {
1207
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1208
(4 * 5) * sizeof(const xmlChar *));
1209
if (defaults == NULL)
1211
defaults->nbAttrs = 0;
1212
defaults->maxAttrs = 4;
1213
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214
defaults, NULL) < 0) {
1218
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
1219
xmlDefAttrsPtr temp;
1221
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1222
(2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1226
defaults->maxAttrs *= 2;
1227
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228
defaults, NULL) < 0) {
1235
* Split the element name into prefix:localname , the string found
1236
* are within the DTD and hen not associated to namespace names.
1238
name = xmlSplitQName3(fullattr, &len);
1240
name = xmlDictLookup(ctxt->dict, fullattr, -1);
1243
name = xmlDictLookup(ctxt->dict, name, -1);
1244
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1247
defaults->values[5 * defaults->nbAttrs] = name;
1248
defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1249
/* intern the string and precompute the end */
1250
len = xmlStrlen(value);
1251
value = xmlDictLookup(ctxt->dict, value, len);
1252
defaults->values[5 * defaults->nbAttrs + 2] = value;
1253
defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1255
defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1257
defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1258
defaults->nbAttrs++;
1263
xmlErrMemory(ctxt, NULL);
1268
* xmlAddSpecialAttr:
1269
* @ctxt: an XML parser context
1270
* @fullname: the element fullname
1271
* @fullattr: the attribute fullname
1272
* @type: the attribute type
1274
* Register this attribute type
1277
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278
const xmlChar *fullname,
1279
const xmlChar *fullattr,
1282
if (ctxt->attsSpecial == NULL) {
1283
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1284
if (ctxt->attsSpecial == NULL)
1288
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1291
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292
(void *) (long) type);
1296
xmlErrMemory(ctxt, NULL);
1301
* xmlCleanSpecialAttrCallback:
1303
* Removes CDATA attributes from the special attribute table
1306
xmlCleanSpecialAttrCallback(void *payload, void *data,
1307
const xmlChar *fullname, const xmlChar *fullattr,
1308
const xmlChar *unused ATTRIBUTE_UNUSED) {
1309
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1311
if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1312
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1317
* xmlCleanSpecialAttr:
1318
* @ctxt: an XML parser context
1320
* Trim the list of attributes defined to remove all those of type
1321
* CDATA as they are not special. This call should be done when finishing
1322
* to parse the DTD and before starting to parse the document root.
1325
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1327
if (ctxt->attsSpecial == NULL)
1330
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1332
if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333
xmlHashFree(ctxt->attsSpecial, NULL);
1334
ctxt->attsSpecial = NULL;
1340
* xmlCheckLanguageID:
1341
* @lang: pointer to the string value
1343
* Checks that the value conforms to the LanguageID production:
1345
* NOTE: this is somewhat deprecated, those productions were removed from
1346
* the XML Second edition.
1348
* [33] LanguageID ::= Langcode ('-' Subcode)*
1349
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353
* [38] Subcode ::= ([a-z] | [A-Z])+
1355
* The current REC reference the sucessors of RFC 1766, currently 5646
1357
* http://www.rfc-editor.org/rfc/rfc5646.txt
1358
* langtag = language
1364
* language = 2*3ALPHA ; shortest ISO 639 code
1365
* ["-" extlang] ; sometimes followed by
1366
* ; extended language subtags
1367
* / 4ALPHA ; or reserved for future use
1368
* / 5*8ALPHA ; or registered language subtag
1370
* extlang = 3ALPHA ; selected ISO 639 codes
1371
* *2("-" 3ALPHA) ; permanently reserved
1373
* script = 4ALPHA ; ISO 15924 code
1375
* region = 2ALPHA ; ISO 3166-1 code
1376
* / 3DIGIT ; UN M.49 code
1378
* variant = 5*8alphanum ; registered variants
1379
* / (DIGIT 3alphanum)
1381
* extension = singleton 1*("-" (2*8alphanum))
1383
* ; Single alphanumerics
1384
* ; "x" reserved for private use
1385
* singleton = DIGIT ; 0 - 9
1391
* it sounds right to still allow Irregular i-xxx IANA and user codes too
1392
* The parser below doesn't try to cope with extension or privateuse
1393
* that could be added but that's not interoperable anyway
1395
* Returns 1 if correct 0 otherwise
1398
xmlCheckLanguageID(const xmlChar * lang)
1400
const xmlChar *cur = lang, *nxt;
1404
if (((cur[0] == 'i') && (cur[1] == '-')) ||
1405
((cur[0] == 'I') && (cur[1] == '-')) ||
1406
((cur[0] == 'x') && (cur[1] == '-')) ||
1407
((cur[0] == 'X') && (cur[1] == '-'))) {
1409
* Still allow IANA code and user code which were coming
1410
* from the previous version of the XML-1.0 specification
1411
* it's deprecated but we should not fail
1414
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1415
((cur[0] >= 'a') && (cur[0] <= 'z')))
1417
return(cur[0] == 0);
1420
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1423
if (nxt - cur >= 4) {
1427
if ((nxt - cur > 8) || (nxt[0] != 0))
1433
/* we got an ISO 639 code */
1441
/* now we can have extlang or script or region or variant */
1442
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1445
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1452
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456
/* we parsed an extlang */
1464
/* now we can have script or region or variant */
1465
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1468
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1477
/* we parsed a script */
1486
/* now we can have region or variant */
1487
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1490
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1494
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1498
/* we parsed a region */
1507
/* now we can just have a variant */
1508
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1512
if ((nxt - cur < 5) || (nxt - cur > 8))
1515
/* we parsed a variant */
1521
/* extensions and private use subtags not checked */
1525
if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526
((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1533
/************************************************************************
1535
* Parser stacks related functions and macros *
1537
************************************************************************/
1539
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540
const xmlChar ** str);
1545
* @ctxt: an XML parser context
1546
* @prefix: the namespace prefix or NULL
1547
* @URL: the namespace name
1549
* Pushes a new parser namespace on top of the ns stack
1551
* Returns -1 in case of error, -2 if the namespace should be discarded
1552
* and the index in the stack otherwise.
1555
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1557
if (ctxt->options & XML_PARSE_NSCLEAN) {
1559
for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1560
if (ctxt->nsTab[i] == prefix) {
1562
if (ctxt->nsTab[i + 1] == URL)
1564
/* out of scope keep it */
1569
if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1572
ctxt->nsTab = (const xmlChar **)
1573
xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574
if (ctxt->nsTab == NULL) {
1575
xmlErrMemory(ctxt, NULL);
1579
} else if (ctxt->nsNr >= ctxt->nsMax) {
1580
const xmlChar ** tmp;
1582
tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583
ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1585
xmlErrMemory(ctxt, NULL);
1591
ctxt->nsTab[ctxt->nsNr++] = prefix;
1592
ctxt->nsTab[ctxt->nsNr++] = URL;
1593
return (ctxt->nsNr);
1597
* @ctxt: an XML parser context
1598
* @nr: the number to pop
1600
* Pops the top @nr parser prefix/namespace from the ns stack
1602
* Returns the number of namespaces removed
1605
nsPop(xmlParserCtxtPtr ctxt, int nr)
1609
if (ctxt->nsTab == NULL) return(0);
1610
if (ctxt->nsNr < nr) {
1611
xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1614
if (ctxt->nsNr <= 0)
1617
for (i = 0;i < nr;i++) {
1619
ctxt->nsTab[ctxt->nsNr] = NULL;
1626
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627
const xmlChar **atts;
1631
if (ctxt->atts == NULL) {
1632
maxatts = 55; /* allow for 10 attrs by default */
1633
atts = (const xmlChar **)
1634
xmlMalloc(maxatts * sizeof(xmlChar *));
1635
if (atts == NULL) goto mem_error;
1637
attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638
if (attallocs == NULL) goto mem_error;
1639
ctxt->attallocs = attallocs;
1640
ctxt->maxatts = maxatts;
1641
} else if (nr + 5 > ctxt->maxatts) {
1642
maxatts = (nr + 5) * 2;
1643
atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644
maxatts * sizeof(const xmlChar *));
1645
if (atts == NULL) goto mem_error;
1647
attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648
(maxatts / 5) * sizeof(int));
1649
if (attallocs == NULL) goto mem_error;
1650
ctxt->attallocs = attallocs;
1651
ctxt->maxatts = maxatts;
1653
return(ctxt->maxatts);
1655
xmlErrMemory(ctxt, NULL);
1661
* @ctxt: an XML parser context
1662
* @value: the parser input
1664
* Pushes a new parser input on top of the input stack
1666
* Returns -1 in case of error, the index in the stack otherwise
1669
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1671
if ((ctxt == NULL) || (value == NULL))
1673
if (ctxt->inputNr >= ctxt->inputMax) {
1674
ctxt->inputMax *= 2;
1676
(xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1678
sizeof(ctxt->inputTab[0]));
1679
if (ctxt->inputTab == NULL) {
1680
xmlErrMemory(ctxt, NULL);
1681
xmlFreeInputStream(value);
1682
ctxt->inputMax /= 2;
1687
ctxt->inputTab[ctxt->inputNr] = value;
1688
ctxt->input = value;
1689
return (ctxt->inputNr++);
1693
* @ctxt: an XML parser context
1695
* Pops the top parser input from the input stack
1697
* Returns the input just removed
1700
inputPop(xmlParserCtxtPtr ctxt)
1702
xmlParserInputPtr ret;
1706
if (ctxt->inputNr <= 0)
1709
if (ctxt->inputNr > 0)
1710
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1713
ret = ctxt->inputTab[ctxt->inputNr];
1714
ctxt->inputTab[ctxt->inputNr] = NULL;
1719
* @ctxt: an XML parser context
1720
* @value: the element node
1722
* Pushes a new element node on top of the node stack
1724
* Returns -1 in case of error, the index in the stack otherwise
1727
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1729
if (ctxt == NULL) return(0);
1730
if (ctxt->nodeNr >= ctxt->nodeMax) {
1733
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1735
sizeof(ctxt->nodeTab[0]));
1737
xmlErrMemory(ctxt, NULL);
1740
ctxt->nodeTab = tmp;
1743
if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744
((ctxt->options & XML_PARSE_HUGE) == 0)) {
1745
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1746
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1748
ctxt->instate = XML_PARSER_EOF;
1751
ctxt->nodeTab[ctxt->nodeNr] = value;
1753
return (ctxt->nodeNr++);
1758
* @ctxt: an XML parser context
1760
* Pops the top element node from the node stack
1762
* Returns the node just removed
1765
nodePop(xmlParserCtxtPtr ctxt)
1769
if (ctxt == NULL) return(NULL);
1770
if (ctxt->nodeNr <= 0)
1773
if (ctxt->nodeNr > 0)
1774
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1777
ret = ctxt->nodeTab[ctxt->nodeNr];
1778
ctxt->nodeTab[ctxt->nodeNr] = NULL;
1782
#ifdef LIBXML_PUSH_ENABLED
1785
* @ctxt: an XML parser context
1786
* @value: the element name
1787
* @prefix: the element prefix
1788
* @URI: the element namespace name
1790
* Pushes a new element name/prefix/URL on top of the name stack
1792
* Returns -1 in case of error, the index in the stack otherwise
1795
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796
const xmlChar *prefix, const xmlChar *URI, int nsNr)
1798
if (ctxt->nameNr >= ctxt->nameMax) {
1799
const xmlChar * *tmp;
1802
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1804
sizeof(ctxt->nameTab[0]));
1809
ctxt->nameTab = tmp;
1810
tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1812
sizeof(ctxt->pushTab[0]));
1817
ctxt->pushTab = tmp2;
1819
ctxt->nameTab[ctxt->nameNr] = value;
1821
ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822
ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1823
ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1824
return (ctxt->nameNr++);
1826
xmlErrMemory(ctxt, NULL);
1831
* @ctxt: an XML parser context
1833
* Pops the top element/prefix/URI name from the name stack
1835
* Returns the name just removed
1837
static const xmlChar *
1838
nameNsPop(xmlParserCtxtPtr ctxt)
1842
if (ctxt->nameNr <= 0)
1845
if (ctxt->nameNr > 0)
1846
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1849
ret = ctxt->nameTab[ctxt->nameNr];
1850
ctxt->nameTab[ctxt->nameNr] = NULL;
1853
#endif /* LIBXML_PUSH_ENABLED */
1857
* @ctxt: an XML parser context
1858
* @value: the element name
1860
* Pushes a new element name on top of the name stack
1862
* Returns -1 in case of error, the index in the stack otherwise
1865
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1867
if (ctxt == NULL) return (-1);
1869
if (ctxt->nameNr >= ctxt->nameMax) {
1870
const xmlChar * *tmp;
1871
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1873
sizeof(ctxt->nameTab[0]));
1877
ctxt->nameTab = tmp;
1880
ctxt->nameTab[ctxt->nameNr] = value;
1882
return (ctxt->nameNr++);
1884
xmlErrMemory(ctxt, NULL);
1889
* @ctxt: an XML parser context
1891
* Pops the top element name from the name stack
1893
* Returns the name just removed
1896
namePop(xmlParserCtxtPtr ctxt)
1900
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1903
if (ctxt->nameNr > 0)
1904
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1907
ret = ctxt->nameTab[ctxt->nameNr];
1908
ctxt->nameTab[ctxt->nameNr] = NULL;
1912
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1913
if (ctxt->spaceNr >= ctxt->spaceMax) {
1916
ctxt->spaceMax *= 2;
1917
tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1920
xmlErrMemory(ctxt, NULL);
1924
ctxt->spaceTab = tmp;
1926
ctxt->spaceTab[ctxt->spaceNr] = val;
1927
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928
return(ctxt->spaceNr++);
1931
static int spacePop(xmlParserCtxtPtr ctxt) {
1933
if (ctxt->spaceNr <= 0) return(0);
1935
if (ctxt->spaceNr > 0)
1936
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1938
ctxt->space = &ctxt->spaceTab[0];
1939
ret = ctxt->spaceTab[ctxt->spaceNr];
1940
ctxt->spaceTab[ctxt->spaceNr] = -1;
1945
* Macros for accessing the content. Those should be used only by the parser,
1948
* Dirty macros, i.e. one often need to make assumption on the context to
1951
* CUR_PTR return the current pointer to the xmlChar to be parsed.
1952
* To be used with extreme caution since operations consuming
1953
* characters may move the input buffer to a different location !
1954
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955
* This should be used internally by the parser
1956
* only to compare to ASCII values otherwise it would break when
1957
* running with UTF-8 encoding.
1958
* RAW same as CUR but in the input buffer, bypass any token
1959
* extraction that may have been done
1960
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961
* to compare on ASCII based substring.
1962
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1963
* strings without newlines within the parser.
1964
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1965
* defined char within the parser.
1966
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1968
* NEXT Skip to the next character, this does the proper decoding
1969
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
1970
* NEXTL(l) Skip the current unicode character of l xmlChars long.
1971
* CUR_CHAR(l) returns the current unicode character (int), set l
1972
* to the number of xmlChars used for the encoding [0-5].
1973
* CUR_SCHAR same but operate on a string instead of the context
1974
* COPY_BUF copy the current unicode char to the target buffer, increment
1976
* GROW, SHRINK handling of input buffers
1979
#define RAW (*ctxt->input->cur)
1980
#define CUR (*ctxt->input->cur)
1981
#define NXT(val) ctxt->input->cur[(val)]
1982
#define CUR_PTR ctxt->input->cur
1984
#define CMP4( s, c1, c2, c3, c4 ) \
1985
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997
((unsigned char *) s)[ 8 ] == c9 )
1998
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000
((unsigned char *) s)[ 9 ] == c10 )
2002
#define SKIP(val) do { \
2003
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2004
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2005
if ((*ctxt->input->cur == 0) && \
2006
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007
xmlPopInput(ctxt); \
2010
#define SKIPL(val) do { \
2012
for(skipl=0; skipl<val; skipl++) { \
2013
if (*(ctxt->input->cur) == '\n') { \
2014
ctxt->input->line++; ctxt->input->col = 1; \
2015
} else ctxt->input->col++; \
2017
ctxt->input->cur++; \
2019
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020
if ((*ctxt->input->cur == 0) && \
2021
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022
xmlPopInput(ctxt); \
2025
#define SHRINK if ((ctxt->progressive == 0) && \
2026
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2030
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031
xmlParserInputShrink(ctxt->input);
2032
if ((*ctxt->input->cur == 0) &&
2033
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2037
#define GROW if ((ctxt->progressive == 0) && \
2038
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2041
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2042
if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
2044
((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2045
((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2047
ctxt->instate = XML_PARSER_EOF;
2049
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2050
if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2051
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2057
#define NEXT xmlNextChar(ctxt)
2060
ctxt->input->col++; \
2061
ctxt->input->cur++; \
2063
if (*ctxt->input->cur == 0) \
2064
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2067
#define NEXTL(l) do { \
2068
if (*(ctxt->input->cur) == '\n') { \
2069
ctxt->input->line++; ctxt->input->col = 1; \
2070
} else ctxt->input->col++; \
2071
ctxt->input->cur += l; \
2072
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2075
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2078
#define COPY_BUF(l,b,i,v) \
2079
if (l == 1) b[i++] = (xmlChar) v; \
2080
else i += xmlCopyCharMultiByte(&b[i],v)
2083
* xmlSkipBlankChars:
2084
* @ctxt: the XML parser context
2086
* skip all blanks character found at that point in the input streams.
2087
* It pops up finished entities in the process if allowable at that point.
2089
* Returns the number of space chars skipped
2093
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2097
* It's Okay to use CUR/NEXT here since all the blanks are on
2100
if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2103
* if we are in the document content, go really fast
2105
cur = ctxt->input->cur;
2106
while (IS_BLANK_CH(*cur)) {
2108
ctxt->input->line++; ctxt->input->col = 1;
2113
ctxt->input->cur = cur;
2114
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115
cur = ctxt->input->cur;
2118
ctxt->input->cur = cur;
2123
while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2128
while ((cur == 0) && (ctxt->inputNr > 1) &&
2129
(ctxt->instate != XML_PARSER_COMMENT)) {
2134
* Need to handle support of entities branching here
2136
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137
} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2142
/************************************************************************
2144
* Commodity functions to handle entities *
2146
************************************************************************/
2150
* @ctxt: an XML parser context
2152
* xmlPopInput: the current input pointed by ctxt->input came to an end
2153
* pop it and return the next char.
2155
* Returns the current xmlChar in the parser context
2158
xmlPopInput(xmlParserCtxtPtr ctxt) {
2159
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2160
if (xmlParserDebugEntities)
2161
xmlGenericError(xmlGenericErrorContext,
2162
"Popping input %d\n", ctxt->inputNr);
2163
xmlFreeInputStream(inputPop(ctxt));
2164
if ((*ctxt->input->cur == 0) &&
2165
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166
return(xmlPopInput(ctxt));
2172
* @ctxt: an XML parser context
2173
* @input: an XML parser input fragment (entity, XML fragment ...).
2175
* xmlPushInput: switch to a new input stream which is stacked on top
2176
* of the previous one(s).
2177
* Returns -1 in case of error or the index in the input stack
2180
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2182
if (input == NULL) return(-1);
2184
if (xmlParserDebugEntities) {
2185
if ((ctxt->input != NULL) && (ctxt->input->filename))
2186
xmlGenericError(xmlGenericErrorContext,
2187
"%s(%d): ", ctxt->input->filename,
2189
xmlGenericError(xmlGenericErrorContext,
2190
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2192
ret = inputPush(ctxt, input);
2193
if (ctxt->instate == XML_PARSER_EOF)
2201
* @ctxt: an XML parser context
2203
* parse Reference declarations
2205
* [66] CharRef ::= '&#' [0-9]+ ';' |
2206
* '&#x' [0-9a-fA-F]+ ';'
2208
* [ WFC: Legal Character ]
2209
* Characters referred to using character references must match the
2210
* production for Char.
2212
* Returns the value parsed (as an int), 0 in case of error
2215
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2216
unsigned int val = 0;
2218
unsigned int outofrange = 0;
2221
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2223
if ((RAW == '&') && (NXT(1) == '#') &&
2227
while (RAW != ';') { /* loop blocked by count */
2231
if (ctxt->instate == XML_PARSER_EOF)
2234
if ((RAW >= '0') && (RAW <= '9'))
2235
val = val * 16 + (CUR - '0');
2236
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237
val = val * 16 + (CUR - 'a') + 10;
2238
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239
val = val * 16 + (CUR - 'A') + 10;
2241
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2252
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2257
} else if ((RAW == '&') && (NXT(1) == '#')) {
2260
while (RAW != ';') { /* loop blocked by count */
2264
if (ctxt->instate == XML_PARSER_EOF)
2267
if ((RAW >= '0') && (RAW <= '9'))
2268
val = val * 10 + (CUR - '0');
2270
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2281
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2287
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2291
* [ WFC: Legal Character ]
2292
* Characters referred to using character references must match the
2293
* production for Char.
2295
if ((IS_CHAR(val) && (outofrange == 0))) {
2298
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299
"xmlParseCharRef: invalid xmlChar value %d\n",
2306
* xmlParseStringCharRef:
2307
* @ctxt: an XML parser context
2308
* @str: a pointer to an index in the string
2310
* parse Reference declarations, variant parsing from a string rather
2311
* than an an input flow.
2313
* [66] CharRef ::= '&#' [0-9]+ ';' |
2314
* '&#x' [0-9a-fA-F]+ ';'
2316
* [ WFC: Legal Character ]
2317
* Characters referred to using character references must match the
2318
* production for Char.
2320
* Returns the value parsed (as an int), 0 in case of error, str will be
2321
* updated to the current value of the index
2324
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2327
unsigned int val = 0;
2328
unsigned int outofrange = 0;
2330
if ((str == NULL) || (*str == NULL)) return(0);
2333
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2336
while (cur != ';') { /* Non input consuming loop */
2337
if ((cur >= '0') && (cur <= '9'))
2338
val = val * 16 + (cur - '0');
2339
else if ((cur >= 'a') && (cur <= 'f'))
2340
val = val * 16 + (cur - 'a') + 10;
2341
else if ((cur >= 'A') && (cur <= 'F'))
2342
val = val * 16 + (cur - 'A') + 10;
2344
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2356
} else if ((cur == '&') && (ptr[1] == '#')){
2359
while (cur != ';') { /* Non input consuming loops */
2360
if ((cur >= '0') && (cur <= '9'))
2361
val = val * 10 + (cur - '0');
2363
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2376
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2382
* [ WFC: Legal Character ]
2383
* Characters referred to using character references must match the
2384
* production for Char.
2386
if ((IS_CHAR(val) && (outofrange == 0))) {
2389
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
"xmlParseStringCharRef: invalid xmlChar value %d\n",
2397
* xmlNewBlanksWrapperInputStream:
2398
* @ctxt: an XML parser context
2399
* @entity: an Entity pointer
2401
* Create a new input stream for wrapping
2402
* blanks around a PEReference
2404
* Returns the new input stream or NULL
2407
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2409
static xmlParserInputPtr
2410
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411
xmlParserInputPtr input;
2414
if (entity == NULL) {
2415
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416
"xmlNewBlanksWrapperInputStream entity\n");
2419
if (xmlParserDebugEntities)
2420
xmlGenericError(xmlGenericErrorContext,
2421
"new blanks wrapper for entity: %s\n", entity->name);
2422
input = xmlNewInputStream(ctxt);
2423
if (input == NULL) {
2426
length = xmlStrlen(entity->name) + 5;
2427
buffer = xmlMallocAtomic(length);
2428
if (buffer == NULL) {
2429
xmlErrMemory(ctxt, NULL);
2435
buffer [length-3] = ';';
2436
buffer [length-2] = ' ';
2437
buffer [length-1] = 0;
2438
memcpy(buffer + 2, entity->name, length - 5);
2439
input->free = deallocblankswrapper;
2440
input->base = buffer;
2441
input->cur = buffer;
2442
input->length = length;
2443
input->end = &buffer[length];
2448
* xmlParserHandlePEReference:
2449
* @ctxt: the parser context
2451
* [69] PEReference ::= '%' Name ';'
2453
* [ WFC: No Recursion ]
2454
* A parsed entity must not contain a recursive
2455
* reference to itself, either directly or indirectly.
2457
* [ WFC: Entity Declared ]
2458
* In a document without any DTD, a document with only an internal DTD
2459
* subset which contains no parameter entity references, or a document
2460
* with "standalone='yes'", ... ... The declaration of a parameter
2461
* entity must precede any reference to it...
2463
* [ VC: Entity Declared ]
2464
* In a document with an external subset or external parameter entities
2465
* with "standalone='no'", ... ... The declaration of a parameter entity
2466
* must precede any reference to it...
2469
* Parameter-entity references may only appear in the DTD.
2470
* NOTE: misleading but this is handled.
2472
* A PEReference may have been detected in the current input stream
2473
* the handling is done accordingly to
2474
* http://www.w3.org/TR/REC-xml#entproc
2476
* - Included in literal in entity values
2477
* - Included as Parameter Entity reference within DTDs
2480
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2481
const xmlChar *name;
2482
xmlEntityPtr entity = NULL;
2483
xmlParserInputPtr input;
2485
if (RAW != '%') return;
2486
switch(ctxt->instate) {
2487
case XML_PARSER_CDATA_SECTION:
2489
case XML_PARSER_COMMENT:
2491
case XML_PARSER_START_TAG:
2493
case XML_PARSER_END_TAG:
2495
case XML_PARSER_EOF:
2496
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2498
case XML_PARSER_PROLOG:
2499
case XML_PARSER_START:
2500
case XML_PARSER_MISC:
2501
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2503
case XML_PARSER_ENTITY_DECL:
2504
case XML_PARSER_CONTENT:
2505
case XML_PARSER_ATTRIBUTE_VALUE:
2507
case XML_PARSER_SYSTEM_LITERAL:
2508
case XML_PARSER_PUBLIC_LITERAL:
2509
/* we just ignore it there */
2511
case XML_PARSER_EPILOG:
2512
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2514
case XML_PARSER_ENTITY_VALUE:
2516
* NOTE: in the case of entity values, we don't do the
2517
* substitution here since we need the literal
2518
* entity value to be able to save the internal
2519
* subset of the document.
2520
* This will be handled by xmlStringDecodeEntities
2523
case XML_PARSER_DTD:
2525
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526
* In the internal DTD subset, parameter-entity references
2527
* can occur only where markup declarations can occur, not
2528
* within markup declarations.
2529
* In that case this is handled in xmlParseMarkupDecl
2531
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2533
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2536
case XML_PARSER_IGNORE:
2541
name = xmlParseName(ctxt);
2542
if (xmlParserDebugEntities)
2543
xmlGenericError(xmlGenericErrorContext,
2544
"PEReference: %s\n", name);
2546
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2550
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2552
if (ctxt->instate == XML_PARSER_EOF)
2554
if (entity == NULL) {
2557
* [ WFC: Entity Declared ]
2558
* In a document without any DTD, a document with only an
2559
* internal DTD subset which contains no parameter entity
2560
* references, or a document with "standalone='yes'", ...
2561
* ... The declaration of a parameter entity must precede
2562
* any reference to it...
2564
if ((ctxt->standalone == 1) ||
2565
((ctxt->hasExternalSubset == 0) &&
2566
(ctxt->hasPErefs == 0))) {
2567
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2568
"PEReference: %%%s; not found\n", name);
2571
* [ VC: Entity Declared ]
2572
* In a document with an external subset or external
2573
* parameter entities with "standalone='no'", ...
2574
* ... The declaration of a parameter entity must precede
2575
* any reference to it...
2577
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579
"PEReference: %%%s; not found\n",
2582
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583
"PEReference: %%%s; not found\n",
2587
} else if (ctxt->input->free != deallocblankswrapper) {
2588
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2589
if (xmlPushInput(ctxt, input) < 0)
2592
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2595
xmlCharEncoding enc;
2598
* Note: external parameter entities will not be loaded, it
2599
* is not required for a non-validating parser, unless the
2600
* option of validating, or substituting entities were
2601
* given. Doing so is far more secure as the parser will
2602
* only process data coming from the document entity by
2605
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2606
((ctxt->options & XML_PARSE_NOENT) == 0) &&
2607
((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2608
((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2609
((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2610
(ctxt->replaceEntities == 0) &&
2611
(ctxt->validate == 0))
2615
* handle the extra spaces added before and after
2616
* c.f. http://www.w3.org/TR/REC-xml#as-PE
2617
* this is done independently.
2619
input = xmlNewEntityInputStream(ctxt, entity);
2620
if (xmlPushInput(ctxt, input) < 0)
2624
* Get the 4 first bytes and decode the charset
2625
* if enc != XML_CHAR_ENCODING_NONE
2626
* plug some encoding conversion routines.
2627
* Note that, since we may have some non-UTF8
2628
* encoding (like UTF16, bug 135229), the 'length'
2629
* is not known, but we can calculate based upon
2630
* the amount of data in the buffer.
2633
if (ctxt->instate == XML_PARSER_EOF)
2635
if ((ctxt->input->end - ctxt->input->cur)>=4) {
2640
enc = xmlDetectCharEncoding(start, 4);
2641
if (enc != XML_CHAR_ENCODING_NONE) {
2642
xmlSwitchEncoding(ctxt, enc);
2646
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2647
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2648
(IS_BLANK_CH(NXT(5)))) {
2649
xmlParseTextDecl(ctxt);
2652
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2653
"PEReference: %s is not a parameter entity\n",
2658
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2664
* Macro used to grow the current buffer.
2665
* buffer##_size is expected to be a size_t
2666
* mem_error: is expected to handle memory allocation failures
2668
#define growBuffer(buffer, n) { \
2670
size_t new_size = buffer##_size * 2 + n; \
2671
if (new_size < buffer##_size) goto mem_error; \
2672
tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2673
if (tmp == NULL) goto mem_error; \
2675
buffer##_size = new_size; \
2679
* xmlStringLenDecodeEntities:
2680
* @ctxt: the parser context
2681
* @str: the input string
2682
* @len: the string length
2683
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2684
* @end: an end marker xmlChar, 0 if none
2685
* @end2: an end marker xmlChar, 0 if none
2686
* @end3: an end marker xmlChar, 0 if none
2688
* Takes a entity string content and process to do the adequate substitutions.
2690
* [67] Reference ::= EntityRef | CharRef
2692
* [69] PEReference ::= '%' Name ';'
2694
* Returns A newly allocated string with the substitution done. The caller
2695
* must deallocate it !
2698
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2699
int what, xmlChar end, xmlChar end2, xmlChar end3) {
2700
xmlChar *buffer = NULL;
2701
size_t buffer_size = 0;
2704
xmlChar *current = NULL;
2705
xmlChar *rep = NULL;
2706
const xmlChar *last;
2710
if ((ctxt == NULL) || (str == NULL) || (len < 0))
2714
if (((ctxt->depth > 40) &&
2715
((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2716
(ctxt->depth > 1024)) {
2717
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2722
* allocate a translation buffer.
2724
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2725
buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2726
if (buffer == NULL) goto mem_error;
2729
* OK loop until we reach one of the ending char or a size limit.
2730
* we are operating on already parsed values.
2733
c = CUR_SCHAR(str, l);
2736
while ((c != 0) && (c != end) && /* non input consuming loop */
2737
(c != end2) && (c != end3)) {
2740
if ((c == '&') && (str[1] == '#')) {
2741
int val = xmlParseStringCharRef(ctxt, &str);
2743
COPY_BUF(0,buffer,nbchars,val);
2745
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2746
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2748
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2749
if (xmlParserDebugEntities)
2750
xmlGenericError(xmlGenericErrorContext,
2751
"String decoding Entity Reference: %.30s\n",
2753
ent = xmlParseStringEntityRef(ctxt, &str);
2754
if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2755
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2758
ctxt->nbentities += ent->checked / 2;
2759
if ((ent != NULL) &&
2760
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2761
if (ent->content != NULL) {
2762
COPY_BUF(0,buffer,nbchars,ent->content[0]);
2763
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2764
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2767
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2768
"predefined entity has no content\n");
2770
} else if ((ent != NULL) && (ent->content != NULL)) {
2772
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2778
while (*current != 0) { /* non input consuming loop */
2779
buffer[nbchars++] = *current++;
2780
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2781
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2783
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2789
} else if (ent != NULL) {
2790
int i = xmlStrlen(ent->name);
2791
const xmlChar *cur = ent->name;
2793
buffer[nbchars++] = '&';
2794
if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2798
buffer[nbchars++] = *cur++;
2799
buffer[nbchars++] = ';';
2801
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2802
if (xmlParserDebugEntities)
2803
xmlGenericError(xmlGenericErrorContext,
2804
"String decoding PE Reference: %.30s\n", str);
2805
ent = xmlParseStringPEReference(ctxt, &str);
2806
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2809
ctxt->nbentities += ent->checked / 2;
2811
if (ent->content == NULL) {
2812
xmlLoadEntityContent(ctxt, ent);
2815
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2820
while (*current != 0) { /* non input consuming loop */
2821
buffer[nbchars++] = *current++;
2822
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2823
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2825
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2833
COPY_BUF(l,buffer,nbchars,c);
2835
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2836
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2840
c = CUR_SCHAR(str, l);
2844
buffer[nbchars] = 0;
2848
xmlErrMemory(ctxt, NULL);
2858
* xmlStringDecodeEntities:
2859
* @ctxt: the parser context
2860
* @str: the input string
2861
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2862
* @end: an end marker xmlChar, 0 if none
2863
* @end2: an end marker xmlChar, 0 if none
2864
* @end3: an end marker xmlChar, 0 if none
2866
* Takes a entity string content and process to do the adequate substitutions.
2868
* [67] Reference ::= EntityRef | CharRef
2870
* [69] PEReference ::= '%' Name ';'
2872
* Returns A newly allocated string with the substitution done. The caller
2873
* must deallocate it !
2876
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2877
xmlChar end, xmlChar end2, xmlChar end3) {
2878
if ((ctxt == NULL) || (str == NULL)) return(NULL);
2879
return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2883
/************************************************************************
2885
* Commodity functions, cleanup needed ? *
2887
************************************************************************/
2891
* @ctxt: an XML parser context
2893
* @len: the size of @str
2894
* @blank_chars: we know the chars are blanks
2896
* Is this a sequence of blank chars that one can ignore ?
2898
* Returns 1 if ignorable 0 otherwise.
2901
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2904
xmlNodePtr lastChild;
2907
* Don't spend time trying to differentiate them, the same callback is
2910
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2914
* Check for xml:space value.
2916
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2917
(*(ctxt->space) == -2))
2921
* Check that the string is made of blanks
2923
if (blank_chars == 0) {
2924
for (i = 0;i < len;i++)
2925
if (!(IS_BLANK_CH(str[i]))) return(0);
2929
* Look if the element is mixed content in the DTD if available
2931
if (ctxt->node == NULL) return(0);
2932
if (ctxt->myDoc != NULL) {
2933
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2934
if (ret == 0) return(1);
2935
if (ret == 1) return(0);
2939
* Otherwise, heuristic :-\
2941
if ((RAW != '<') && (RAW != 0xD)) return(0);
2942
if ((ctxt->node->children == NULL) &&
2943
(RAW == '<') && (NXT(1) == '/')) return(0);
2945
lastChild = xmlGetLastChild(ctxt->node);
2946
if (lastChild == NULL) {
2947
if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2948
(ctxt->node->content != NULL)) return(0);
2949
} else if (xmlNodeIsText(lastChild))
2951
else if ((ctxt->node->children != NULL) &&
2952
(xmlNodeIsText(ctxt->node->children)))
2957
/************************************************************************
2959
* Extra stuff for namespace support *
2960
* Relates to http://www.w3.org/TR/WD-xml-names *
2962
************************************************************************/
2966
* @ctxt: an XML parser context
2967
* @name: an XML parser context
2968
* @prefix: a xmlChar **
2970
* parse an UTF8 encoded XML qualified name string
2972
* [NS 5] QName ::= (Prefix ':')? LocalPart
2974
* [NS 6] Prefix ::= NCName
2976
* [NS 7] LocalPart ::= NCName
2978
* Returns the local part, and prefix is updated
2979
* to get the Prefix if any.
2983
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2984
xmlChar buf[XML_MAX_NAMELEN + 5];
2985
xmlChar *buffer = NULL;
2987
int max = XML_MAX_NAMELEN;
2988
xmlChar *ret = NULL;
2989
const xmlChar *cur = name;
2992
if (prefix == NULL) return(NULL);
2995
if (cur == NULL) return(NULL);
2997
#ifndef XML_XML_NAMESPACE
2998
/* xml: prefix is not really a namespace */
2999
if ((cur[0] == 'x') && (cur[1] == 'm') &&
3000
(cur[2] == 'l') && (cur[3] == ':'))
3001
return(xmlStrdup(name));
3004
/* nasty but well=formed */
3006
return(xmlStrdup(name));
3009
while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3015
* Okay someone managed to make a huge name, so he's ready to pay
3016
* for the processing speed.
3020
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3021
if (buffer == NULL) {
3022
xmlErrMemory(ctxt, NULL);
3025
memcpy(buffer, buf, len);
3026
while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3027
if (len + 10 > max) {
3031
tmp = (xmlChar *) xmlRealloc(buffer,
3032
max * sizeof(xmlChar));
3035
xmlErrMemory(ctxt, NULL);
3046
if ((c == ':') && (*cur == 0)) {
3050
return(xmlStrdup(name));
3054
ret = xmlStrndup(buf, len);
3058
max = XML_MAX_NAMELEN;
3066
return(xmlStrndup(BAD_CAST "", 0));
3071
* Check that the first character is proper to start
3074
if (!(((c >= 0x61) && (c <= 0x7A)) ||
3075
((c >= 0x41) && (c <= 0x5A)) ||
3076
(c == '_') || (c == ':'))) {
3078
int first = CUR_SCHAR(cur, l);
3080
if (!IS_LETTER(first) && (first != '_')) {
3081
xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3082
"Name %s is not XML Namespace compliant\n",
3088
while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3094
* Okay someone managed to make a huge name, so he's ready to pay
3095
* for the processing speed.
3099
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3100
if (buffer == NULL) {
3101
xmlErrMemory(ctxt, NULL);
3104
memcpy(buffer, buf, len);
3105
while (c != 0) { /* tested bigname2.xml */
3106
if (len + 10 > max) {
3110
tmp = (xmlChar *) xmlRealloc(buffer,
3111
max * sizeof(xmlChar));
3113
xmlErrMemory(ctxt, NULL);
3126
ret = xmlStrndup(buf, len);
3135
/************************************************************************
3137
* The parser itself *
3138
* Relates to http://www.w3.org/TR/REC-xml *
3140
************************************************************************/
3142
/************************************************************************
3144
* Routines to parse Name, NCName and NmToken *
3146
************************************************************************/
3148
static unsigned long nbParseName = 0;
3149
static unsigned long nbParseNmToken = 0;
3150
static unsigned long nbParseNCName = 0;
3151
static unsigned long nbParseNCNameComplex = 0;
3152
static unsigned long nbParseNameComplex = 0;
3153
static unsigned long nbParseStringName = 0;
3157
* The two following functions are related to the change of accepted
3158
* characters for Name and NmToken in the Revision 5 of XML-1.0
3159
* They correspond to the modified production [4] and the new production [4a]
3160
* changes in that revision. Also note that the macros used for the
3161
* productions Letter, Digit, CombiningChar and Extender are not needed
3163
* We still keep compatibility to pre-revision5 parsing semantic if the
3164
* new XML_PARSE_OLD10 option is given to the parser.
3167
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3168
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3170
* Use the new checks of production [4] [4a] amd [5] of the
3171
* Update 5 of XML-1.0
3173
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174
(((c >= 'a') && (c <= 'z')) ||
3175
((c >= 'A') && (c <= 'Z')) ||
3176
(c == '_') || (c == ':') ||
3177
((c >= 0xC0) && (c <= 0xD6)) ||
3178
((c >= 0xD8) && (c <= 0xF6)) ||
3179
((c >= 0xF8) && (c <= 0x2FF)) ||
3180
((c >= 0x370) && (c <= 0x37D)) ||
3181
((c >= 0x37F) && (c <= 0x1FFF)) ||
3182
((c >= 0x200C) && (c <= 0x200D)) ||
3183
((c >= 0x2070) && (c <= 0x218F)) ||
3184
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3185
((c >= 0x3001) && (c <= 0xD7FF)) ||
3186
((c >= 0xF900) && (c <= 0xFDCF)) ||
3187
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3188
((c >= 0x10000) && (c <= 0xEFFFF))))
3191
if (IS_LETTER(c) || (c == '_') || (c == ':'))
3198
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3199
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3201
* Use the new checks of production [4] [4a] amd [5] of the
3202
* Update 5 of XML-1.0
3204
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3205
(((c >= 'a') && (c <= 'z')) ||
3206
((c >= 'A') && (c <= 'Z')) ||
3207
((c >= '0') && (c <= '9')) || /* !start */
3208
(c == '_') || (c == ':') ||
3209
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3210
((c >= 0xC0) && (c <= 0xD6)) ||
3211
((c >= 0xD8) && (c <= 0xF6)) ||
3212
((c >= 0xF8) && (c <= 0x2FF)) ||
3213
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3214
((c >= 0x370) && (c <= 0x37D)) ||
3215
((c >= 0x37F) && (c <= 0x1FFF)) ||
3216
((c >= 0x200C) && (c <= 0x200D)) ||
3217
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3218
((c >= 0x2070) && (c <= 0x218F)) ||
3219
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3220
((c >= 0x3001) && (c <= 0xD7FF)) ||
3221
((c >= 0xF900) && (c <= 0xFDCF)) ||
3222
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3223
((c >= 0x10000) && (c <= 0xEFFFF))))
3226
if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3227
(c == '.') || (c == '-') ||
3228
(c == '_') || (c == ':') ||
3229
(IS_COMBINING(c)) ||
3236
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3237
int *len, int *alloc, int normalize);
3239
static const xmlChar *
3240
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3246
nbParseNameComplex++;
3250
* Handler for more complex cases
3253
if (ctxt->instate == XML_PARSER_EOF)
3256
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3258
* Use the new checks of production [4] [4a] amd [5] of the
3259
* Update 5 of XML-1.0
3261
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3262
(!(((c >= 'a') && (c <= 'z')) ||
3263
((c >= 'A') && (c <= 'Z')) ||
3264
(c == '_') || (c == ':') ||
3265
((c >= 0xC0) && (c <= 0xD6)) ||
3266
((c >= 0xD8) && (c <= 0xF6)) ||
3267
((c >= 0xF8) && (c <= 0x2FF)) ||
3268
((c >= 0x370) && (c <= 0x37D)) ||
3269
((c >= 0x37F) && (c <= 0x1FFF)) ||
3270
((c >= 0x200C) && (c <= 0x200D)) ||
3271
((c >= 0x2070) && (c <= 0x218F)) ||
3272
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3273
((c >= 0x3001) && (c <= 0xD7FF)) ||
3274
((c >= 0xF900) && (c <= 0xFDCF)) ||
3275
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3276
((c >= 0x10000) && (c <= 0xEFFFF))))) {
3282
while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3283
(((c >= 'a') && (c <= 'z')) ||
3284
((c >= 'A') && (c <= 'Z')) ||
3285
((c >= '0') && (c <= '9')) || /* !start */
3286
(c == '_') || (c == ':') ||
3287
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3288
((c >= 0xC0) && (c <= 0xD6)) ||
3289
((c >= 0xD8) && (c <= 0xF6)) ||
3290
((c >= 0xF8) && (c <= 0x2FF)) ||
3291
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3292
((c >= 0x370) && (c <= 0x37D)) ||
3293
((c >= 0x37F) && (c <= 0x1FFF)) ||
3294
((c >= 0x200C) && (c <= 0x200D)) ||
3295
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3296
((c >= 0x2070) && (c <= 0x218F)) ||
3297
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3298
((c >= 0x3001) && (c <= 0xD7FF)) ||
3299
((c >= 0xF900) && (c <= 0xFDCF)) ||
3300
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3301
((c >= 0x10000) && (c <= 0xEFFFF))
3303
if (count++ > XML_PARSER_CHUNK_SIZE) {
3306
if (ctxt->instate == XML_PARSER_EOF)
3314
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3315
(!IS_LETTER(c) && (c != '_') &&
3323
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3324
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3325
(c == '.') || (c == '-') ||
3326
(c == '_') || (c == ':') ||
3327
(IS_COMBINING(c)) ||
3328
(IS_EXTENDER(c)))) {
3329
if (count++ > XML_PARSER_CHUNK_SIZE) {
3332
if (ctxt->instate == XML_PARSER_EOF)
3341
if (ctxt->instate == XML_PARSER_EOF)
3347
if ((len > XML_MAX_NAME_LENGTH) &&
3348
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3349
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3352
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3353
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3354
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3359
* @ctxt: an XML parser context
3361
* parse an XML name.
3363
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3364
* CombiningChar | Extender
3366
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3368
* [6] Names ::= Name (#x20 Name)*
3370
* Returns the Name parsed or NULL
3374
xmlParseName(xmlParserCtxtPtr ctxt) {
3386
* Accelerator for simple ASCII names
3388
in = ctxt->input->cur;
3389
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3390
((*in >= 0x41) && (*in <= 0x5A)) ||
3391
(*in == '_') || (*in == ':')) {
3393
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3394
((*in >= 0x41) && (*in <= 0x5A)) ||
3395
((*in >= 0x30) && (*in <= 0x39)) ||
3396
(*in == '_') || (*in == '-') ||
3397
(*in == ':') || (*in == '.'))
3399
if ((*in > 0) && (*in < 0x80)) {
3400
count = in - ctxt->input->cur;
3401
if ((count > XML_MAX_NAME_LENGTH) &&
3402
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3403
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3406
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3407
ctxt->input->cur = in;
3408
ctxt->nbChars += count;
3409
ctxt->input->col += count;
3411
xmlErrMemory(ctxt, NULL);
3415
/* accelerator for special cases */
3416
return(xmlParseNameComplex(ctxt));
3419
static const xmlChar *
3420
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3424
const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3427
nbParseNCNameComplex++;
3431
* Handler for more complex cases
3434
end = ctxt->input->cur;
3436
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3437
(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3441
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3442
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
3443
if (count++ > XML_PARSER_CHUNK_SIZE) {
3444
if ((len > XML_MAX_NAME_LENGTH) &&
3445
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3446
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3451
if (ctxt->instate == XML_PARSER_EOF)
3456
end = ctxt->input->cur;
3461
if (ctxt->instate == XML_PARSER_EOF)
3463
end = ctxt->input->cur;
3467
if ((len > XML_MAX_NAME_LENGTH) &&
3468
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3469
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3472
return(xmlDictLookup(ctxt->dict, end - len, len));
3477
* @ctxt: an XML parser context
3478
* @len: length of the string parsed
3480
* parse an XML name.
3482
* [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3483
* CombiningChar | Extender
3485
* [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3487
* Returns the Name parsed or NULL
3490
static const xmlChar *
3491
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3501
* Accelerator for simple ASCII names
3503
in = ctxt->input->cur;
3504
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3505
((*in >= 0x41) && (*in <= 0x5A)) ||
3508
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3509
((*in >= 0x41) && (*in <= 0x5A)) ||
3510
((*in >= 0x30) && (*in <= 0x39)) ||
3511
(*in == '_') || (*in == '-') ||
3514
if ((*in > 0) && (*in < 0x80)) {
3515
count = in - ctxt->input->cur;
3516
if ((count > XML_MAX_NAME_LENGTH) &&
3517
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3518
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3521
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3522
ctxt->input->cur = in;
3523
ctxt->nbChars += count;
3524
ctxt->input->col += count;
3526
xmlErrMemory(ctxt, NULL);
3531
return(xmlParseNCNameComplex(ctxt));
3535
* xmlParseNameAndCompare:
3536
* @ctxt: an XML parser context
3538
* parse an XML name and compares for match
3539
* (specialized for endtag parsing)
3541
* Returns NULL for an illegal name, (xmlChar*) 1 for success
3542
* and the name for mismatch
3545
static const xmlChar *
3546
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3547
register const xmlChar *cmp = other;
3548
register const xmlChar *in;
3552
if (ctxt->instate == XML_PARSER_EOF)
3555
in = ctxt->input->cur;
3556
while (*in != 0 && *in == *cmp) {
3561
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3563
ctxt->input->cur = in;
3564
return (const xmlChar*) 1;
3566
/* failure (or end of input buffer), check with full function */
3567
ret = xmlParseName (ctxt);
3568
/* strings coming from the dictionnary direct compare possible */
3570
return (const xmlChar*) 1;
3576
* xmlParseStringName:
3577
* @ctxt: an XML parser context
3578
* @str: a pointer to the string pointer (IN/OUT)
3580
* parse an XML name.
3582
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3583
* CombiningChar | Extender
3585
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3587
* [6] Names ::= Name (#x20 Name)*
3589
* Returns the Name parsed or NULL. The @str pointer
3590
* is updated to the current location in the string.
3594
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3595
xmlChar buf[XML_MAX_NAMELEN + 5];
3596
const xmlChar *cur = *str;
3601
nbParseStringName++;
3604
c = CUR_SCHAR(cur, l);
3605
if (!xmlIsNameStartChar(ctxt, c)) {
3609
COPY_BUF(l,buf,len,c);
3611
c = CUR_SCHAR(cur, l);
3612
while (xmlIsNameChar(ctxt, c)) {
3613
COPY_BUF(l,buf,len,c);
3615
c = CUR_SCHAR(cur, l);
3616
if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3618
* Okay someone managed to make a huge name, so he's ready to pay
3619
* for the processing speed.
3624
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3625
if (buffer == NULL) {
3626
xmlErrMemory(ctxt, NULL);
3629
memcpy(buffer, buf, len);
3630
while (xmlIsNameChar(ctxt, c)) {
3631
if (len + 10 > max) {
3634
if ((len > XML_MAX_NAME_LENGTH) &&
3635
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3636
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3641
tmp = (xmlChar *) xmlRealloc(buffer,
3642
max * sizeof(xmlChar));
3644
xmlErrMemory(ctxt, NULL);
3650
COPY_BUF(l,buffer,len,c);
3652
c = CUR_SCHAR(cur, l);
3659
if ((len > XML_MAX_NAME_LENGTH) &&
3660
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3661
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
return(xmlStrndup(buf, len));
3670
* @ctxt: an XML parser context
3672
* parse an XML Nmtoken.
3674
* [7] Nmtoken ::= (NameChar)+
3676
* [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3678
* Returns the Nmtoken parsed or NULL
3682
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3683
xmlChar buf[XML_MAX_NAMELEN + 5];
3693
if (ctxt->instate == XML_PARSER_EOF)
3697
while (xmlIsNameChar(ctxt, c)) {
3698
if (count++ > XML_PARSER_CHUNK_SIZE) {
3702
COPY_BUF(l,buf,len,c);
3708
if (ctxt->instate == XML_PARSER_EOF)
3712
if (len >= XML_MAX_NAMELEN) {
3714
* Okay someone managed to make a huge token, so he's ready to pay
3715
* for the processing speed.
3720
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3721
if (buffer == NULL) {
3722
xmlErrMemory(ctxt, NULL);
3725
memcpy(buffer, buf, len);
3726
while (xmlIsNameChar(ctxt, c)) {
3727
if (count++ > XML_PARSER_CHUNK_SIZE) {
3730
if (ctxt->instate == XML_PARSER_EOF) {
3735
if (len + 10 > max) {
3738
if ((max > XML_MAX_NAME_LENGTH) &&
3739
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3740
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3745
tmp = (xmlChar *) xmlRealloc(buffer,
3746
max * sizeof(xmlChar));
3748
xmlErrMemory(ctxt, NULL);
3754
COPY_BUF(l,buffer,len,c);
3764
if ((len > XML_MAX_NAME_LENGTH) &&
3765
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3766
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3769
return(xmlStrndup(buf, len));
3773
* xmlParseEntityValue:
3774
* @ctxt: an XML parser context
3775
* @orig: if non-NULL store a copy of the original entity value
3777
* parse a value for ENTITY declarations
3779
* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3780
* "'" ([^%&'] | PEReference | Reference)* "'"
3782
* Returns the EntityValue parsed with reference substituted or NULL
3786
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3787
xmlChar *buf = NULL;
3789
int size = XML_PARSER_BUFFER_SIZE;
3792
xmlChar *ret = NULL;
3793
const xmlChar *cur = NULL;
3794
xmlParserInputPtr input;
3796
if (RAW == '"') stop = '"';
3797
else if (RAW == '\'') stop = '\'';
3799
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3802
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3804
xmlErrMemory(ctxt, NULL);
3809
* The content of the entity definition is copied in a buffer.
3812
ctxt->instate = XML_PARSER_ENTITY_VALUE;
3813
input = ctxt->input;
3815
if (ctxt->instate == XML_PARSER_EOF) {
3822
* NOTE: 4.4.5 Included in Literal
3823
* When a parameter entity reference appears in a literal entity
3824
* value, ... a single or double quote character in the replacement
3825
* text is always treated as a normal data character and will not
3826
* terminate the literal.
3827
* In practice it means we stop the loop only when back at parsing
3828
* the initial entity and the quote is found
3830
while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3831
(ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3832
if (len + 5 >= size) {
3836
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3838
xmlErrMemory(ctxt, NULL);
3844
COPY_BUF(l,buf,len,c);
3847
* Pop-up of finished entities.
3849
while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3860
if (ctxt->instate == XML_PARSER_EOF) {
3866
* Raise problem w.r.t. '&' and '%' being used in non-entities
3867
* reference constructs. Note Charref will be handled in
3868
* xmlStringDecodeEntities()
3871
while (*cur != 0) { /* non input consuming */
3872
if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3877
name = xmlParseStringName(ctxt, &cur);
3878
if ((name == NULL) || (*cur != ';')) {
3879
xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880
"EntityValue: '%c' forbidden except for entities references\n",
3883
if ((tmp == '%') && (ctxt->inSubset == 1) &&
3884
(ctxt->inputNr == 1)) {
3885
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3896
* Then PEReference entities are substituted.
3899
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3904
* NOTE: 4.4.7 Bypassed
3905
* When a general entity reference appears in the EntityValue in
3906
* an entity declaration, it is bypassed and left as is.
3907
* so XML_SUBSTITUTE_REF is not set here.
3909
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3921
* xmlParseAttValueComplex:
3922
* @ctxt: an XML parser context
3923
* @len: the resulting attribute len
3924
* @normalize: wether to apply the inner normalization
3926
* parse a value for an attribute, this is the fallback function
3927
* of xmlParseAttValue() when the attribute parsing requires handling
3928
* of non-ASCII characters, or normalization compaction.
3930
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3933
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3935
xmlChar *buf = NULL;
3936
xmlChar *rep = NULL;
3938
size_t buf_size = 0;
3939
int c, l, in_space = 0;
3940
xmlChar *current = NULL;
3943
if (NXT(0) == '"') {
3944
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3947
} else if (NXT(0) == '\'') {
3949
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3952
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3957
* allocate a translation buffer.
3959
buf_size = XML_PARSER_BUFFER_SIZE;
3960
buf = (xmlChar *) xmlMallocAtomic(buf_size);
3961
if (buf == NULL) goto mem_error;
3964
* OK loop until we reach one of the ending char or a size limit.
3967
while (((NXT(0) != limit) && /* checked */
3968
(IS_CHAR(c)) && (c != '<')) &&
3969
(ctxt->instate != XML_PARSER_EOF)) {
3971
* Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3972
* special option is given
3974
if ((len > XML_MAX_TEXT_LENGTH) &&
3975
((ctxt->options & XML_PARSE_HUGE) == 0)) {
3976
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3977
"AttValue length too long\n");
3983
if (NXT(1) == '#') {
3984
int val = xmlParseCharRef(ctxt);
3987
if (ctxt->replaceEntities) {
3988
if (len + 10 > buf_size) {
3989
growBuffer(buf, 10);
3994
* The reparsing will be done in xmlStringGetNodeList()
3995
* called by the attribute() function in SAX.c
3997
if (len + 10 > buf_size) {
3998
growBuffer(buf, 10);
4006
} else if (val != 0) {
4007
if (len + 10 > buf_size) {
4008
growBuffer(buf, 10);
4010
len += xmlCopyChar(0, &buf[len], val);
4013
ent = xmlParseEntityRef(ctxt);
4016
ctxt->nbentities += ent->owner;
4017
if ((ent != NULL) &&
4018
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4019
if (len + 10 > buf_size) {
4020
growBuffer(buf, 10);
4022
if ((ctxt->replaceEntities == 0) &&
4023
(ent->content[0] == '&')) {
4030
buf[len++] = ent->content[0];
4032
} else if ((ent != NULL) &&
4033
(ctxt->replaceEntities != 0)) {
4034
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4035
rep = xmlStringDecodeEntities(ctxt, ent->content,
4040
while (*current != 0) { /* non input consuming */
4041
if ((*current == 0xD) || (*current == 0xA) ||
4042
(*current == 0x9)) {
4046
buf[len++] = *current++;
4047
if (len + 10 > buf_size) {
4048
growBuffer(buf, 10);
4055
if (len + 10 > buf_size) {
4056
growBuffer(buf, 10);
4058
if (ent->content != NULL)
4059
buf[len++] = ent->content[0];
4061
} else if (ent != NULL) {
4062
int i = xmlStrlen(ent->name);
4063
const xmlChar *cur = ent->name;
4066
* This may look absurd but is needed to detect
4069
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4070
(ent->content != NULL) && (ent->checked == 0)) {
4071
unsigned long oldnbent = ctxt->nbentities;
4073
rep = xmlStringDecodeEntities(ctxt, ent->content,
4074
XML_SUBSTITUTE_REF, 0, 0, 0);
4076
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4078
if (xmlStrchr(rep, '<'))
4086
* Just output the reference
4089
while (len + i + 10 > buf_size) {
4090
growBuffer(buf, i + 10);
4093
buf[len++] = *cur++;
4098
if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4099
if ((len != 0) || (!normalize)) {
4100
if ((!normalize) || (!in_space)) {
4101
COPY_BUF(l,buf,len,0x20);
4102
while (len + 10 > buf_size) {
4103
growBuffer(buf, 10);
4110
COPY_BUF(l,buf,len,c);
4111
if (len + 10 > buf_size) {
4112
growBuffer(buf, 10);
4120
if (ctxt->instate == XML_PARSER_EOF)
4123
if ((in_space) && (normalize)) {
4124
while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4128
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4129
} else if (RAW != limit) {
4130
if ((c != 0) && (!IS_CHAR(c))) {
4131
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4132
"invalid character in attribute value\n");
4134
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4135
"AttValue: ' expected\n");
4141
* There we potentially risk an overflow, don't allow attribute value of
4142
* length more than INT_MAX it is a very reasonnable assumption !
4144
if (len >= INT_MAX) {
4145
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4146
"AttValue length too long\n");
4150
if (attlen != NULL) *attlen = (int) len;
4154
xmlErrMemory(ctxt, NULL);
4165
* @ctxt: an XML parser context
4167
* parse a value for an attribute
4168
* Note: the parser won't do substitution of entities here, this
4169
* will be handled later in xmlStringGetNodeList
4171
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4172
* "'" ([^<&'] | Reference)* "'"
4174
* 3.3.3 Attribute-Value Normalization:
4175
* Before the value of an attribute is passed to the application or
4176
* checked for validity, the XML processor must normalize it as follows:
4177
* - a character reference is processed by appending the referenced
4178
* character to the attribute value
4179
* - an entity reference is processed by recursively processing the
4180
* replacement text of the entity
4181
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4182
* appending #x20 to the normalized value, except that only a single
4183
* #x20 is appended for a "#xD#xA" sequence that is part of an external
4184
* parsed entity or the literal entity value of an internal parsed entity
4185
* - other characters are processed by appending them to the normalized value
4186
* If the declared value is not CDATA, then the XML processor must further
4187
* process the normalized attribute value by discarding any leading and
4188
* trailing space (#x20) characters, and by replacing sequences of space
4189
* (#x20) characters by a single space (#x20) character.
4190
* All attributes for which no declaration has been read should be treated
4191
* by a non-validating parser as if declared CDATA.
4193
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4198
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4199
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4200
return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4204
* xmlParseSystemLiteral:
4205
* @ctxt: an XML parser context
4207
* parse an XML Literal
4209
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4211
* Returns the SystemLiteral parsed or NULL
4215
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4216
xmlChar *buf = NULL;
4218
int size = XML_PARSER_BUFFER_SIZE;
4221
int state = ctxt->instate;
4228
} else if (RAW == '\'') {
4232
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4236
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4238
xmlErrMemory(ctxt, NULL);
4241
ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4243
while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4244
if (len + 5 >= size) {
4247
if ((size > XML_MAX_NAME_LENGTH) &&
4248
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4249
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4251
ctxt->instate = (xmlParserInputState) state;
4255
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4258
xmlErrMemory(ctxt, NULL);
4259
ctxt->instate = (xmlParserInputState) state;
4268
if (ctxt->instate == XML_PARSER_EOF) {
4273
COPY_BUF(l,buf,len,cur);
4283
ctxt->instate = (xmlParserInputState) state;
4284
if (!IS_CHAR(cur)) {
4285
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4293
* xmlParsePubidLiteral:
4294
* @ctxt: an XML parser context
4296
* parse an XML public literal
4298
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4300
* Returns the PubidLiteral parsed or NULL.
4304
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4305
xmlChar *buf = NULL;
4307
int size = XML_PARSER_BUFFER_SIZE;
4311
xmlParserInputState oldstate = ctxt->instate;
4317
} else if (RAW == '\'') {
4321
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4324
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4326
xmlErrMemory(ctxt, NULL);
4329
ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4331
while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4332
if (len + 1 >= size) {
4335
if ((size > XML_MAX_NAME_LENGTH) &&
4336
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4337
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4342
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4344
xmlErrMemory(ctxt, NULL);
4355
if (ctxt->instate == XML_PARSER_EOF) {
4370
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4374
ctxt->instate = oldstate;
4378
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4381
* used for the test in the inner loop of the char data testing
4383
static const unsigned char test_char_data[256] = {
4384
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385
0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4386
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4389
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4390
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4391
0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4392
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4393
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4394
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4395
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4396
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4397
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4398
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4399
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4400
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4401
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4420
* @ctxt: an XML parser context
4421
* @cdata: int indicating whether we are within a CDATA section
4423
* parse a CharData section.
4424
* if we are within a CDATA section ']]>' marks an end of section.
4426
* The right angle bracket (>) may be represented using the string ">",
4427
* and must, for compatibility, be escaped using ">" or a character
4428
* reference when it appears in the string "]]>" in content, when that
4429
* string is not marking the end of a CDATA section.
4431
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4435
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4438
int line = ctxt->input->line;
4439
int col = ctxt->input->col;
4445
* Accelerated common case where input don't need to be
4446
* modified before passing it to the handler.
4449
in = ctxt->input->cur;
4452
while (*in == 0x20) { in++; ctxt->input->col++; }
4455
ctxt->input->line++; ctxt->input->col = 1;
4457
} while (*in == 0xA);
4458
goto get_more_space;
4461
nbchar = in - ctxt->input->cur;
4463
const xmlChar *tmp = ctxt->input->cur;
4464
ctxt->input->cur = in;
4466
if ((ctxt->sax != NULL) &&
4467
(ctxt->sax->ignorableWhitespace !=
4468
ctxt->sax->characters)) {
4469
if (areBlanks(ctxt, tmp, nbchar, 1)) {
4470
if (ctxt->sax->ignorableWhitespace != NULL)
4471
ctxt->sax->ignorableWhitespace(ctxt->userData,
4474
if (ctxt->sax->characters != NULL)
4475
ctxt->sax->characters(ctxt->userData,
4477
if (*ctxt->space == -1)
4480
} else if ((ctxt->sax != NULL) &&
4481
(ctxt->sax->characters != NULL)) {
4482
ctxt->sax->characters(ctxt->userData,
4490
ccol = ctxt->input->col;
4491
while (test_char_data[*in]) {
4495
ctxt->input->col = ccol;
4498
ctxt->input->line++; ctxt->input->col = 1;
4500
} while (*in == 0xA);
4504
if ((in[1] == ']') && (in[2] == '>')) {
4505
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4506
ctxt->input->cur = in;
4513
nbchar = in - ctxt->input->cur;
4515
if ((ctxt->sax != NULL) &&
4516
(ctxt->sax->ignorableWhitespace !=
4517
ctxt->sax->characters) &&
4518
(IS_BLANK_CH(*ctxt->input->cur))) {
4519
const xmlChar *tmp = ctxt->input->cur;
4520
ctxt->input->cur = in;
4522
if (areBlanks(ctxt, tmp, nbchar, 0)) {
4523
if (ctxt->sax->ignorableWhitespace != NULL)
4524
ctxt->sax->ignorableWhitespace(ctxt->userData,
4527
if (ctxt->sax->characters != NULL)
4528
ctxt->sax->characters(ctxt->userData,
4530
if (*ctxt->space == -1)
4533
line = ctxt->input->line;
4534
col = ctxt->input->col;
4535
} else if (ctxt->sax != NULL) {
4536
if (ctxt->sax->characters != NULL)
4537
ctxt->sax->characters(ctxt->userData,
4538
ctxt->input->cur, nbchar);
4539
line = ctxt->input->line;
4540
col = ctxt->input->col;
4542
/* something really bad happened in the SAX callback */
4543
if (ctxt->instate != XML_PARSER_CONTENT)
4546
ctxt->input->cur = in;
4550
ctxt->input->cur = in;
4552
ctxt->input->line++; ctxt->input->col = 1;
4553
continue; /* while */
4565
if (ctxt->instate == XML_PARSER_EOF)
4567
in = ctxt->input->cur;
4568
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4571
ctxt->input->line = line;
4572
ctxt->input->col = col;
4573
xmlParseCharDataComplex(ctxt, cdata);
4577
* xmlParseCharDataComplex:
4578
* @ctxt: an XML parser context
4579
* @cdata: int indicating whether we are within a CDATA section
4581
* parse a CharData section.this is the fallback function
4582
* of xmlParseCharData() when the parsing requires handling
4583
* of non-ASCII characters.
4586
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4587
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4595
while ((cur != '<') && /* checked */
4597
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4598
if ((cur == ']') && (NXT(1) == ']') &&
4602
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4605
COPY_BUF(l,buf,nbchar,cur);
4606
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4610
* OK the segment is to be consumed as chars.
4612
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4613
if (areBlanks(ctxt, buf, nbchar, 0)) {
4614
if (ctxt->sax->ignorableWhitespace != NULL)
4615
ctxt->sax->ignorableWhitespace(ctxt->userData,
4618
if (ctxt->sax->characters != NULL)
4619
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4620
if ((ctxt->sax->characters !=
4621
ctxt->sax->ignorableWhitespace) &&
4622
(*ctxt->space == -1))
4627
/* something really bad happened in the SAX callback */
4628
if (ctxt->instate != XML_PARSER_CONTENT)
4635
if (ctxt->instate == XML_PARSER_EOF)
4644
* OK the segment is to be consumed as chars.
4646
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4647
if (areBlanks(ctxt, buf, nbchar, 0)) {
4648
if (ctxt->sax->ignorableWhitespace != NULL)
4649
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4651
if (ctxt->sax->characters != NULL)
4652
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4653
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4654
(*ctxt->space == -1))
4659
if ((cur != 0) && (!IS_CHAR(cur))) {
4660
/* Generate the error and skip the offending character */
4661
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4662
"PCDATA invalid Char value %d\n",
4669
* xmlParseExternalID:
4670
* @ctxt: an XML parser context
4671
* @publicID: a xmlChar** receiving PubidLiteral
4672
* @strict: indicate whether we should restrict parsing to only
4673
* production [75], see NOTE below
4675
* Parse an External ID or a Public ID
4677
* NOTE: Productions [75] and [83] interact badly since [75] can generate
4678
* 'PUBLIC' S PubidLiteral S SystemLiteral
4680
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4681
* | 'PUBLIC' S PubidLiteral S SystemLiteral
4683
* [83] PublicID ::= 'PUBLIC' S PubidLiteral
4685
* Returns the function returns SystemLiteral and in the second
4686
* case publicID receives PubidLiteral, is strict is off
4687
* it is possible to return NULL and have publicID set.
4691
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4692
xmlChar *URI = NULL;
4697
if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4699
if (!IS_BLANK_CH(CUR)) {
4700
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4701
"Space required after 'SYSTEM'\n");
4704
URI = xmlParseSystemLiteral(ctxt);
4706
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4708
} else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4710
if (!IS_BLANK_CH(CUR)) {
4711
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4712
"Space required after 'PUBLIC'\n");
4715
*publicID = xmlParsePubidLiteral(ctxt);
4716
if (*publicID == NULL) {
4717
xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4721
* We don't handle [83] so "S SystemLiteral" is required.
4723
if (!IS_BLANK_CH(CUR)) {
4724
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4725
"Space required after the Public Identifier\n");
4729
* We handle [83] so we return immediately, if
4730
* "S SystemLiteral" is not detected. From a purely parsing
4731
* point of view that's a nice mess.
4737
if (!IS_BLANK_CH(*ptr)) return(NULL);
4739
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4740
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4743
URI = xmlParseSystemLiteral(ctxt);
4745
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4752
* xmlParseCommentComplex:
4753
* @ctxt: an XML parser context
4754
* @buf: the already parsed part of the buffer
4755
* @len: number of bytes filles in the buffer
4756
* @size: allocated size of the buffer
4758
* Skip an XML (SGML) comment <!-- .... -->
4759
* The spec says that "For compatibility, the string "--" (double-hyphen)
4760
* must not occur within comments. "
4761
* This is the slow routine in case the accelerator for ascii didn't work
4763
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4766
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4767
size_t len, size_t size) {
4774
inputid = ctxt->input->id;
4778
size = XML_PARSER_BUFFER_SIZE;
4779
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4781
xmlErrMemory(ctxt, NULL);
4785
GROW; /* Assure there's enough input data */
4788
goto not_terminated;
4790
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4791
"xmlParseComment: invalid xmlChar value %d\n",
4799
goto not_terminated;
4801
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4802
"xmlParseComment: invalid xmlChar value %d\n",
4810
goto not_terminated;
4811
while (IS_CHAR(cur) && /* checked */
4813
(r != '-') || (q != '-'))) {
4814
if ((r == '-') && (q == '-')) {
4815
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4817
if ((len > XML_MAX_TEXT_LENGTH) &&
4818
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4819
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4820
"Comment too big found", NULL);
4824
if (len + 5 >= size) {
4828
new_size = size * 2;
4829
new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4830
if (new_buf == NULL) {
4832
xmlErrMemory(ctxt, NULL);
4838
COPY_BUF(ql,buf,len,q);
4848
if (ctxt->instate == XML_PARSER_EOF) {
4863
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4864
"Comment not terminated \n<!--%.50s\n", buf);
4865
} else if (!IS_CHAR(cur)) {
4866
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4867
"xmlParseComment: invalid xmlChar value %d\n",
4870
if (inputid != ctxt->input->id) {
4871
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4872
"Comment doesn't start and stop in the same entity\n");
4875
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4876
(!ctxt->disableSAX))
4877
ctxt->sax->comment(ctxt->userData, buf);
4882
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4883
"Comment not terminated\n", NULL);
4890
* @ctxt: an XML parser context
4892
* Skip an XML (SGML) comment <!-- .... -->
4893
* The spec says that "For compatibility, the string "--" (double-hyphen)
4894
* must not occur within comments. "
4896
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4899
xmlParseComment(xmlParserCtxtPtr ctxt) {
4900
xmlChar *buf = NULL;
4901
size_t size = XML_PARSER_BUFFER_SIZE;
4903
xmlParserInputState state;
4910
* Check that there is a comment right here.
4912
if ((RAW != '<') || (NXT(1) != '!') ||
4913
(NXT(2) != '-') || (NXT(3) != '-')) return;
4914
state = ctxt->instate;
4915
ctxt->instate = XML_PARSER_COMMENT;
4916
inputid = ctxt->input->id;
4922
* Accelerated common case where input don't need to be
4923
* modified before passing it to the handler.
4925
in = ctxt->input->cur;
4929
ctxt->input->line++; ctxt->input->col = 1;
4931
} while (*in == 0xA);
4934
ccol = ctxt->input->col;
4935
while (((*in > '-') && (*in <= 0x7F)) ||
4936
((*in >= 0x20) && (*in < '-')) ||
4941
ctxt->input->col = ccol;
4944
ctxt->input->line++; ctxt->input->col = 1;
4946
} while (*in == 0xA);
4949
nbchar = in - ctxt->input->cur;
4951
* save current set of data
4954
if ((ctxt->sax != NULL) &&
4955
(ctxt->sax->comment != NULL)) {
4957
if ((*in == '-') && (in[1] == '-'))
4960
size = XML_PARSER_BUFFER_SIZE + nbchar;
4961
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4963
xmlErrMemory(ctxt, NULL);
4964
ctxt->instate = state;
4968
} else if (len + nbchar + 1 >= size) {
4970
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4971
new_buf = (xmlChar *) xmlRealloc(buf,
4972
size * sizeof(xmlChar));
4973
if (new_buf == NULL) {
4975
xmlErrMemory(ctxt, NULL);
4976
ctxt->instate = state;
4981
memcpy(&buf[len], ctxt->input->cur, nbchar);
4986
if ((len > XML_MAX_TEXT_LENGTH) &&
4987
((ctxt->options & XML_PARSE_HUGE) == 0)) {
4988
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4989
"Comment too big found", NULL);
4993
ctxt->input->cur = in;
4996
ctxt->input->line++; ctxt->input->col = 1;
5001
ctxt->input->cur = in;
5003
ctxt->input->line++; ctxt->input->col = 1;
5004
continue; /* while */
5010
if (ctxt->instate == XML_PARSER_EOF) {
5014
in = ctxt->input->cur;
5018
if (ctxt->input->id != inputid) {
5019
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5020
"comment doesn't start and stop in the same entity\n");
5023
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5024
(!ctxt->disableSAX)) {
5026
ctxt->sax->comment(ctxt->userData, buf);
5028
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5032
if (ctxt->instate != XML_PARSER_EOF)
5033
ctxt->instate = state;
5037
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5038
"Double hyphen within comment: "
5042
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5043
"Double hyphen within comment\n", NULL);
5051
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5052
xmlParseCommentComplex(ctxt, buf, len, size);
5053
ctxt->instate = state;
5060
* @ctxt: an XML parser context
5062
* parse the name of a PI
5064
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5066
* Returns the PITarget name or NULL
5070
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5071
const xmlChar *name;
5073
name = xmlParseName(ctxt);
5074
if ((name != NULL) &&
5075
((name[0] == 'x') || (name[0] == 'X')) &&
5076
((name[1] == 'm') || (name[1] == 'M')) &&
5077
((name[2] == 'l') || (name[2] == 'L'))) {
5079
if ((name[0] == 'x') && (name[1] == 'm') &&
5080
(name[2] == 'l') && (name[3] == 0)) {
5081
xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5082
"XML declaration allowed only at the start of the document\n");
5084
} else if (name[3] == 0) {
5085
xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5089
if (xmlW3CPIs[i] == NULL) break;
5090
if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5093
xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5094
"xmlParsePITarget: invalid name prefix 'xml'\n",
5097
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5098
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5099
"colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5104
#ifdef LIBXML_CATALOG_ENABLED
5106
* xmlParseCatalogPI:
5107
* @ctxt: an XML parser context
5108
* @catalog: the PI value string
5110
* parse an XML Catalog Processing Instruction.
5112
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5114
* Occurs only if allowed by the user and if happening in the Misc
5115
* part of the document before any doctype informations
5116
* This will add the given catalog to the parsing context in order
5117
* to be used if there is a resolution need further down in the document
5121
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5122
xmlChar *URL = NULL;
5123
const xmlChar *tmp, *base;
5127
while (IS_BLANK_CH(*tmp)) tmp++;
5128
if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5131
while (IS_BLANK_CH(*tmp)) tmp++;
5136
while (IS_BLANK_CH(*tmp)) tmp++;
5138
if ((marker != '\'') && (marker != '"'))
5142
while ((*tmp != 0) && (*tmp != marker)) tmp++;
5145
URL = xmlStrndup(base, tmp - base);
5147
while (IS_BLANK_CH(*tmp)) tmp++;
5152
ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5158
xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5159
"Catalog PI syntax error: %s\n",
5168
* @ctxt: an XML parser context
5170
* parse an XML Processing Instruction.
5172
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5174
* The processing is transfered to SAX once parsed.
5178
xmlParsePI(xmlParserCtxtPtr ctxt) {
5179
xmlChar *buf = NULL;
5181
size_t size = XML_PARSER_BUFFER_SIZE;
5183
const xmlChar *target;
5184
xmlParserInputState state;
5187
if ((RAW == '<') && (NXT(1) == '?')) {
5188
xmlParserInputPtr input = ctxt->input;
5189
state = ctxt->instate;
5190
ctxt->instate = XML_PARSER_PI;
5192
* this is a Processing Instruction.
5198
* Parse the target name and check for special support like
5201
target = xmlParsePITarget(ctxt);
5202
if (target != NULL) {
5203
if ((RAW == '?') && (NXT(1) == '>')) {
5204
if (input != ctxt->input) {
5205
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5206
"PI declaration doesn't start and stop in the same entity\n");
5213
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5214
(ctxt->sax->processingInstruction != NULL))
5215
ctxt->sax->processingInstruction(ctxt->userData,
5217
if (ctxt->instate != XML_PARSER_EOF)
5218
ctxt->instate = state;
5221
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5223
xmlErrMemory(ctxt, NULL);
5224
ctxt->instate = state;
5228
if (!IS_BLANK(cur)) {
5229
xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5230
"ParsePI: PI %s space expected\n", target);
5234
while (IS_CHAR(cur) && /* checked */
5235
((cur != '?') || (NXT(1) != '>'))) {
5236
if (len + 5 >= size) {
5238
size_t new_size = size * 2;
5239
tmp = (xmlChar *) xmlRealloc(buf, new_size);
5241
xmlErrMemory(ctxt, NULL);
5243
ctxt->instate = state;
5252
if (ctxt->instate == XML_PARSER_EOF) {
5257
if ((len > XML_MAX_TEXT_LENGTH) &&
5258
((ctxt->options & XML_PARSE_HUGE) == 0)) {
5259
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5260
"PI %s too big found", target);
5262
ctxt->instate = state;
5266
COPY_BUF(l,buf,len,cur);
5275
if ((len > XML_MAX_TEXT_LENGTH) &&
5276
((ctxt->options & XML_PARSE_HUGE) == 0)) {
5277
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5278
"PI %s too big found", target);
5280
ctxt->instate = state;
5285
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5286
"ParsePI: PI %s never end ...\n", target);
5288
if (input != ctxt->input) {
5289
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5290
"PI declaration doesn't start and stop in the same entity\n");
5294
#ifdef LIBXML_CATALOG_ENABLED
5295
if (((state == XML_PARSER_MISC) ||
5296
(state == XML_PARSER_START)) &&
5297
(xmlStrEqual(target, XML_CATALOG_PI))) {
5298
xmlCatalogAllow allow = xmlCatalogGetDefaults();
5299
if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5300
(allow == XML_CATA_ALLOW_ALL))
5301
xmlParseCatalogPI(ctxt, buf);
5309
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5310
(ctxt->sax->processingInstruction != NULL))
5311
ctxt->sax->processingInstruction(ctxt->userData,
5316
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5318
if (ctxt->instate != XML_PARSER_EOF)
5319
ctxt->instate = state;
5324
* xmlParseNotationDecl:
5325
* @ctxt: an XML parser context
5327
* parse a notation declaration
5329
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5331
* Hence there is actually 3 choices:
5332
* 'PUBLIC' S PubidLiteral
5333
* 'PUBLIC' S PubidLiteral S SystemLiteral
5334
* and 'SYSTEM' S SystemLiteral
5336
* See the NOTE on xmlParseExternalID().
5340
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5341
const xmlChar *name;
5345
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5346
xmlParserInputPtr input = ctxt->input;
5349
if (!IS_BLANK_CH(CUR)) {
5350
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5351
"Space required after '<!NOTATION'\n");
5356
name = xmlParseName(ctxt);
5358
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5361
if (!IS_BLANK_CH(CUR)) {
5362
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5363
"Space required after the NOTATION name'\n");
5366
if (xmlStrchr(name, ':') != NULL) {
5367
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5368
"colon are forbidden from notation names '%s'\n",
5376
Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5380
if (input != ctxt->input) {
5381
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5382
"Notation declaration doesn't start and stop in the same entity\n");
5385
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5386
(ctxt->sax->notationDecl != NULL))
5387
ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5389
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5391
if (Systemid != NULL) xmlFree(Systemid);
5392
if (Pubid != NULL) xmlFree(Pubid);
5397
* xmlParseEntityDecl:
5398
* @ctxt: an XML parser context
5400
* parse <!ENTITY declarations
5402
* [70] EntityDecl ::= GEDecl | PEDecl
5404
* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5406
* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5408
* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5410
* [74] PEDef ::= EntityValue | ExternalID
5412
* [76] NDataDecl ::= S 'NDATA' S Name
5414
* [ VC: Notation Declared ]
5415
* The Name must match the declared name of a notation.
5419
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5420
const xmlChar *name = NULL;
5421
xmlChar *value = NULL;
5422
xmlChar *URI = NULL, *literal = NULL;
5423
const xmlChar *ndata = NULL;
5424
int isParameter = 0;
5425
xmlChar *orig = NULL;
5428
/* GROW; done in the caller */
5429
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5430
xmlParserInputPtr input = ctxt->input;
5433
skipped = SKIP_BLANKS;
5435
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5436
"Space required after '<!ENTITY'\n");
5441
skipped = SKIP_BLANKS;
5443
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5444
"Space required after '%'\n");
5449
name = xmlParseName(ctxt);
5451
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5452
"xmlParseEntityDecl: no name\n");
5455
if (xmlStrchr(name, ':') != NULL) {
5456
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5457
"colon are forbidden from entities names '%s'\n",
5460
skipped = SKIP_BLANKS;
5462
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5463
"Space required after the entity name\n");
5466
ctxt->instate = XML_PARSER_ENTITY_DECL;
5468
* handle the various case of definitions...
5471
if ((RAW == '"') || (RAW == '\'')) {
5472
value = xmlParseEntityValue(ctxt, &orig);
5474
if ((ctxt->sax != NULL) &&
5475
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5476
ctxt->sax->entityDecl(ctxt->userData, name,
5477
XML_INTERNAL_PARAMETER_ENTITY,
5481
URI = xmlParseExternalID(ctxt, &literal, 1);
5482
if ((URI == NULL) && (literal == NULL)) {
5483
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5488
uri = xmlParseURI((const char *) URI);
5490
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5491
"Invalid URI: %s\n", URI);
5493
* This really ought to be a well formedness error
5494
* but the XML Core WG decided otherwise c.f. issue
5495
* E26 of the XML erratas.
5498
if (uri->fragment != NULL) {
5500
* Okay this is foolish to block those but not
5503
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5505
if ((ctxt->sax != NULL) &&
5506
(!ctxt->disableSAX) &&
5507
(ctxt->sax->entityDecl != NULL))
5508
ctxt->sax->entityDecl(ctxt->userData, name,
5509
XML_EXTERNAL_PARAMETER_ENTITY,
5510
literal, URI, NULL);
5517
if ((RAW == '"') || (RAW == '\'')) {
5518
value = xmlParseEntityValue(ctxt, &orig);
5519
if ((ctxt->sax != NULL) &&
5520
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5521
ctxt->sax->entityDecl(ctxt->userData, name,
5522
XML_INTERNAL_GENERAL_ENTITY,
5525
* For expat compatibility in SAX mode.
5527
if ((ctxt->myDoc == NULL) ||
5528
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5529
if (ctxt->myDoc == NULL) {
5530
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5531
if (ctxt->myDoc == NULL) {
5532
xmlErrMemory(ctxt, "New Doc failed");
5535
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5537
if (ctxt->myDoc->intSubset == NULL)
5538
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5539
BAD_CAST "fake", NULL, NULL);
5541
xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5545
URI = xmlParseExternalID(ctxt, &literal, 1);
5546
if ((URI == NULL) && (literal == NULL)) {
5547
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5552
uri = xmlParseURI((const char *)URI);
5554
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5555
"Invalid URI: %s\n", URI);
5557
* This really ought to be a well formedness error
5558
* but the XML Core WG decided otherwise c.f. issue
5559
* E26 of the XML erratas.
5562
if (uri->fragment != NULL) {
5564
* Okay this is foolish to block those but not
5567
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5572
if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5573
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5574
"Space required before 'NDATA'\n");
5577
if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5579
if (!IS_BLANK_CH(CUR)) {
5580
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5581
"Space required after 'NDATA'\n");
5584
ndata = xmlParseName(ctxt);
5585
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5586
(ctxt->sax->unparsedEntityDecl != NULL))
5587
ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5588
literal, URI, ndata);
5590
if ((ctxt->sax != NULL) &&
5591
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5592
ctxt->sax->entityDecl(ctxt->userData, name,
5593
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5594
literal, URI, NULL);
5596
* For expat compatibility in SAX mode.
5597
* assuming the entity repalcement was asked for
5599
if ((ctxt->replaceEntities != 0) &&
5600
((ctxt->myDoc == NULL) ||
5601
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5602
if (ctxt->myDoc == NULL) {
5603
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5604
if (ctxt->myDoc == NULL) {
5605
xmlErrMemory(ctxt, "New Doc failed");
5608
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5611
if (ctxt->myDoc->intSubset == NULL)
5612
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5613
BAD_CAST "fake", NULL, NULL);
5614
xmlSAX2EntityDecl(ctxt, name,
5615
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5616
literal, URI, NULL);
5621
if (ctxt->instate == XML_PARSER_EOF)
5625
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5626
"xmlParseEntityDecl: entity %s not terminated\n", name);
5628
if (input != ctxt->input) {
5629
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5630
"Entity declaration doesn't start and stop in the same entity\n");
5636
* Ugly mechanism to save the raw entity value.
5638
xmlEntityPtr cur = NULL;
5641
if ((ctxt->sax != NULL) &&
5642
(ctxt->sax->getParameterEntity != NULL))
5643
cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5645
if ((ctxt->sax != NULL) &&
5646
(ctxt->sax->getEntity != NULL))
5647
cur = ctxt->sax->getEntity(ctxt->userData, name);
5648
if ((cur == NULL) && (ctxt->userData==ctxt)) {
5649
cur = xmlSAX2GetEntity(ctxt, name);
5653
if (cur->orig != NULL)
5660
if (value != NULL) xmlFree(value);
5661
if (URI != NULL) xmlFree(URI);
5662
if (literal != NULL) xmlFree(literal);
5667
* xmlParseDefaultDecl:
5668
* @ctxt: an XML parser context
5669
* @value: Receive a possible fixed default value for the attribute
5671
* Parse an attribute default declaration
5673
* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5675
* [ VC: Required Attribute ]
5676
* if the default declaration is the keyword #REQUIRED, then the
5677
* attribute must be specified for all elements of the type in the
5678
* attribute-list declaration.
5680
* [ VC: Attribute Default Legal ]
5681
* The declared default value must meet the lexical constraints of
5682
* the declared attribute type c.f. xmlValidateAttributeDecl()
5684
* [ VC: Fixed Attribute Default ]
5685
* if an attribute has a default value declared with the #FIXED
5686
* keyword, instances of that attribute must match the default value.
5688
* [ WFC: No < in Attribute Values ]
5689
* handled in xmlParseAttValue()
5691
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5692
* or XML_ATTRIBUTE_FIXED.
5696
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5701
if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5703
return(XML_ATTRIBUTE_REQUIRED);
5705
if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5707
return(XML_ATTRIBUTE_IMPLIED);
5709
val = XML_ATTRIBUTE_NONE;
5710
if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5712
val = XML_ATTRIBUTE_FIXED;
5713
if (!IS_BLANK_CH(CUR)) {
5714
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5715
"Space required after '#FIXED'\n");
5719
ret = xmlParseAttValue(ctxt);
5720
ctxt->instate = XML_PARSER_DTD;
5722
xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5723
"Attribute default value declaration error\n");
5730
* xmlParseNotationType:
5731
* @ctxt: an XML parser context
5733
* parse an Notation attribute type.
5735
* Note: the leading 'NOTATION' S part has already being parsed...
5737
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5739
* [ VC: Notation Attributes ]
5740
* Values of this type must match one of the notation names included
5741
* in the declaration; all notation names in the declaration must be declared.
5743
* Returns: the notation attribute tree built while parsing
5747
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5748
const xmlChar *name;
5749
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5752
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5759
name = xmlParseName(ctxt);
5761
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5762
"Name expected in NOTATION declaration\n");
5763
xmlFreeEnumeration(ret);
5767
while (tmp != NULL) {
5768
if (xmlStrEqual(name, tmp->name)) {
5769
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5770
"standalone: attribute notation value token %s duplicated\n",
5772
if (!xmlDictOwns(ctxt->dict, name))
5773
xmlFree((xmlChar *) name);
5779
cur = xmlCreateEnumeration(name);
5781
xmlFreeEnumeration(ret);
5784
if (last == NULL) ret = last = cur;
5791
} while (RAW == '|');
5793
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5794
xmlFreeEnumeration(ret);
5802
* xmlParseEnumerationType:
5803
* @ctxt: an XML parser context
5805
* parse an Enumeration attribute type.
5807
* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5809
* [ VC: Enumeration ]
5810
* Values of this type must match one of the Nmtoken tokens in
5813
* Returns: the enumeration attribute tree built while parsing
5817
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5819
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5822
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5829
name = xmlParseNmtoken(ctxt);
5831
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5835
while (tmp != NULL) {
5836
if (xmlStrEqual(name, tmp->name)) {
5837
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5838
"standalone: attribute enumeration value token %s duplicated\n",
5840
if (!xmlDictOwns(ctxt->dict, name))
5847
cur = xmlCreateEnumeration(name);
5848
if (!xmlDictOwns(ctxt->dict, name))
5851
xmlFreeEnumeration(ret);
5854
if (last == NULL) ret = last = cur;
5861
} while (RAW == '|');
5863
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5871
* xmlParseEnumeratedType:
5872
* @ctxt: an XML parser context
5873
* @tree: the enumeration tree built while parsing
5875
* parse an Enumerated attribute type.
5877
* [57] EnumeratedType ::= NotationType | Enumeration
5879
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5882
* Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5886
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5887
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5889
if (!IS_BLANK_CH(CUR)) {
5890
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5891
"Space required after 'NOTATION'\n");
5895
*tree = xmlParseNotationType(ctxt);
5896
if (*tree == NULL) return(0);
5897
return(XML_ATTRIBUTE_NOTATION);
5899
*tree = xmlParseEnumerationType(ctxt);
5900
if (*tree == NULL) return(0);
5901
return(XML_ATTRIBUTE_ENUMERATION);
5905
* xmlParseAttributeType:
5906
* @ctxt: an XML parser context
5907
* @tree: the enumeration tree built while parsing
5909
* parse the Attribute list def for an element
5911
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
5913
* [55] StringType ::= 'CDATA'
5915
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5916
* 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5918
* Validity constraints for attribute values syntax are checked in
5919
* xmlValidateAttributeValue()
5922
* Values of type ID must match the Name production. A name must not
5923
* appear more than once in an XML document as a value of this type;
5924
* i.e., ID values must uniquely identify the elements which bear them.
5926
* [ VC: One ID per Element Type ]
5927
* No element type may have more than one ID attribute specified.
5929
* [ VC: ID Attribute Default ]
5930
* An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5933
* Values of type IDREF must match the Name production, and values
5934
* of type IDREFS must match Names; each IDREF Name must match the value
5935
* of an ID attribute on some element in the XML document; i.e. IDREF
5936
* values must match the value of some ID attribute.
5938
* [ VC: Entity Name ]
5939
* Values of type ENTITY must match the Name production, values
5940
* of type ENTITIES must match Names; each Entity Name must match the
5941
* name of an unparsed entity declared in the DTD.
5943
* [ VC: Name Token ]
5944
* Values of type NMTOKEN must match the Nmtoken production; values
5945
* of type NMTOKENS must match Nmtokens.
5947
* Returns the attribute type
5950
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5952
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5954
return(XML_ATTRIBUTE_CDATA);
5955
} else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5957
return(XML_ATTRIBUTE_IDREFS);
5958
} else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5960
return(XML_ATTRIBUTE_IDREF);
5961
} else if ((RAW == 'I') && (NXT(1) == 'D')) {
5963
return(XML_ATTRIBUTE_ID);
5964
} else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5966
return(XML_ATTRIBUTE_ENTITY);
5967
} else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5969
return(XML_ATTRIBUTE_ENTITIES);
5970
} else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5972
return(XML_ATTRIBUTE_NMTOKENS);
5973
} else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5975
return(XML_ATTRIBUTE_NMTOKEN);
5977
return(xmlParseEnumeratedType(ctxt, tree));
5981
* xmlParseAttributeListDecl:
5982
* @ctxt: an XML parser context
5984
* : parse the Attribute list def for an element
5986
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5988
* [53] AttDef ::= S Name S AttType S DefaultDecl
5992
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5993
const xmlChar *elemName;
5994
const xmlChar *attrName;
5995
xmlEnumerationPtr tree;
5997
if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5998
xmlParserInputPtr input = ctxt->input;
6001
if (!IS_BLANK_CH(CUR)) {
6002
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6003
"Space required after '<!ATTLIST'\n");
6006
elemName = xmlParseName(ctxt);
6007
if (elemName == NULL) {
6008
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6009
"ATTLIST: no name for Element\n");
6014
while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6015
const xmlChar *check = CUR_PTR;
6018
xmlChar *defaultValue = NULL;
6022
attrName = xmlParseName(ctxt);
6023
if (attrName == NULL) {
6024
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6025
"ATTLIST: no name for Attribute\n");
6029
if (!IS_BLANK_CH(CUR)) {
6030
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6031
"Space required after the attribute name\n");
6036
type = xmlParseAttributeType(ctxt, &tree);
6042
if (!IS_BLANK_CH(CUR)) {
6043
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6044
"Space required after the attribute type\n");
6046
xmlFreeEnumeration(tree);
6051
def = xmlParseDefaultDecl(ctxt, &defaultValue);
6053
if (defaultValue != NULL)
6054
xmlFree(defaultValue);
6056
xmlFreeEnumeration(tree);
6059
if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6060
xmlAttrNormalizeSpace(defaultValue, defaultValue);
6064
if (!IS_BLANK_CH(CUR)) {
6065
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
"Space required after the attribute default value\n");
6067
if (defaultValue != NULL)
6068
xmlFree(defaultValue);
6070
xmlFreeEnumeration(tree);
6075
if (check == CUR_PTR) {
6076
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6077
"in xmlParseAttributeListDecl\n");
6078
if (defaultValue != NULL)
6079
xmlFree(defaultValue);
6081
xmlFreeEnumeration(tree);
6084
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6085
(ctxt->sax->attributeDecl != NULL))
6086
ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6087
type, def, defaultValue, tree);
6088
else if (tree != NULL)
6089
xmlFreeEnumeration(tree);
6091
if ((ctxt->sax2) && (defaultValue != NULL) &&
6092
(def != XML_ATTRIBUTE_IMPLIED) &&
6093
(def != XML_ATTRIBUTE_REQUIRED)) {
6094
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6097
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6099
if (defaultValue != NULL)
6100
xmlFree(defaultValue);
6104
if (input != ctxt->input) {
6105
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6106
"Attribute list declaration doesn't start and stop in the same entity\n",
6115
* xmlParseElementMixedContentDecl:
6116
* @ctxt: an XML parser context
6117
* @inputchk: the input used for the current entity, needed for boundary checks
6119
* parse the declaration for a Mixed Element content
6120
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6122
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6123
* '(' S? '#PCDATA' S? ')'
6125
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6127
* [ VC: No Duplicate Types ]
6128
* The same name must not appear more than once in a single
6129
* mixed-content declaration.
6131
* returns: the list of the xmlElementContentPtr describing the element choices
6133
xmlElementContentPtr
6134
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6135
xmlElementContentPtr ret = NULL, cur = NULL, n;
6136
const xmlChar *elem = NULL;
6139
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6144
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6145
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6146
"Element content declaration doesn't start and stop in the same entity\n",
6150
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6154
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6159
if ((RAW == '(') || (RAW == '|')) {
6160
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6161
if (ret == NULL) return(NULL);
6163
while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6166
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6167
if (ret == NULL) return(NULL);
6173
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6174
if (n == NULL) return(NULL);
6175
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6184
elem = xmlParseName(ctxt);
6186
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6187
"xmlParseElementMixedContentDecl : Name expected\n");
6188
xmlFreeDocElementContent(ctxt->myDoc, cur);
6194
if ((RAW == ')') && (NXT(1) == '*')) {
6196
cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6197
XML_ELEMENT_CONTENT_ELEMENT);
6198
if (cur->c2 != NULL)
6199
cur->c2->parent = cur;
6202
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6203
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6204
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
"Element content declaration doesn't start and stop in the same entity\n",
6210
xmlFreeDocElementContent(ctxt->myDoc, ret);
6211
xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6216
xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6222
* xmlParseElementChildrenContentDeclPriv:
6223
* @ctxt: an XML parser context
6224
* @inputchk: the input used for the current entity, needed for boundary checks
6225
* @depth: the level of recursion
6227
* parse the declaration for a Mixed Element content
6228
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6231
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6233
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6235
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6237
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6239
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6240
* TODO Parameter-entity replacement text must be properly nested
6241
* with parenthesized groups. That is to say, if either of the
6242
* opening or closing parentheses in a choice, seq, or Mixed
6243
* construct is contained in the replacement text for a parameter
6244
* entity, both must be contained in the same replacement text. For
6245
* interoperability, if a parameter-entity reference appears in a
6246
* choice, seq, or Mixed construct, its replacement text should not
6247
* be empty, and neither the first nor last non-blank character of
6248
* the replacement text should be a connector (| or ,).
6250
* Returns the tree of xmlElementContentPtr describing the element
6253
static xmlElementContentPtr
6254
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6256
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6257
const xmlChar *elem;
6260
if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6262
xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6263
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6270
int inputid = ctxt->input->id;
6272
/* Recurse on first child */
6275
cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6280
elem = xmlParseName(ctxt);
6282
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6285
cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6287
xmlErrMemory(ctxt, NULL);
6292
cur->ocur = XML_ELEMENT_CONTENT_OPT;
6294
} else if (RAW == '*') {
6295
cur->ocur = XML_ELEMENT_CONTENT_MULT;
6297
} else if (RAW == '+') {
6298
cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6301
cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6307
while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6309
* Each loop we parse one separator and one element.
6312
if (type == 0) type = CUR;
6315
* Detect "Name | Name , Name" error
6317
else if (type != CUR) {
6318
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6319
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6321
if ((last != NULL) && (last != ret))
6322
xmlFreeDocElementContent(ctxt->myDoc, last);
6324
xmlFreeDocElementContent(ctxt->myDoc, ret);
6329
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6331
if ((last != NULL) && (last != ret))
6332
xmlFreeDocElementContent(ctxt->myDoc, last);
6333
xmlFreeDocElementContent(ctxt->myDoc, ret);
6351
} else if (RAW == '|') {
6352
if (type == 0) type = CUR;
6355
* Detect "Name , Name | Name" error
6357
else if (type != CUR) {
6358
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6359
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6361
if ((last != NULL) && (last != ret))
6362
xmlFreeDocElementContent(ctxt->myDoc, last);
6364
xmlFreeDocElementContent(ctxt->myDoc, ret);
6369
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6371
if ((last != NULL) && (last != ret))
6372
xmlFreeDocElementContent(ctxt->myDoc, last);
6374
xmlFreeDocElementContent(ctxt->myDoc, ret);
6393
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6394
if ((last != NULL) && (last != ret))
6395
xmlFreeDocElementContent(ctxt->myDoc, last);
6397
xmlFreeDocElementContent(ctxt->myDoc, ret);
6404
int inputid = ctxt->input->id;
6405
/* Recurse on second child */
6408
last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6412
elem = xmlParseName(ctxt);
6414
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6416
xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6422
xmlFreeDocElementContent(ctxt->myDoc, ret);
6426
last->ocur = XML_ELEMENT_CONTENT_OPT;
6428
} else if (RAW == '*') {
6429
last->ocur = XML_ELEMENT_CONTENT_MULT;
6431
} else if (RAW == '+') {
6432
last->ocur = XML_ELEMENT_CONTENT_PLUS;
6435
last->ocur = XML_ELEMENT_CONTENT_ONCE;
6441
if ((cur != NULL) && (last != NULL)) {
6446
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6447
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6448
"Element content declaration doesn't start and stop in the same entity\n",
6454
if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6455
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6456
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6458
ret->ocur = XML_ELEMENT_CONTENT_OPT;
6461
} else if (RAW == '*') {
6463
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6466
* Some normalization:
6467
* (a | b* | c?)* == (a | b | c)*
6469
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6470
if ((cur->c1 != NULL) &&
6471
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6472
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6473
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6474
if ((cur->c2 != NULL) &&
6475
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6476
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6477
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6482
} else if (RAW == '+') {
6486
if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6487
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6488
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490
ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6492
* Some normalization:
6493
* (a | b*)+ == (a | b)*
6494
* (a | b?)+ == (a | b)*
6496
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6497
if ((cur->c1 != NULL) &&
6498
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6499
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6500
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6503
if ((cur->c2 != NULL) &&
6504
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6505
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6506
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6512
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6520
* xmlParseElementChildrenContentDecl:
6521
* @ctxt: an XML parser context
6522
* @inputchk: the input used for the current entity, needed for boundary checks
6524
* parse the declaration for a Mixed Element content
6525
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6527
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6529
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6531
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6533
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6535
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6536
* TODO Parameter-entity replacement text must be properly nested
6537
* with parenthesized groups. That is to say, if either of the
6538
* opening or closing parentheses in a choice, seq, or Mixed
6539
* construct is contained in the replacement text for a parameter
6540
* entity, both must be contained in the same replacement text. For
6541
* interoperability, if a parameter-entity reference appears in a
6542
* choice, seq, or Mixed construct, its replacement text should not
6543
* be empty, and neither the first nor last non-blank character of
6544
* the replacement text should be a connector (| or ,).
6546
* Returns the tree of xmlElementContentPtr describing the element
6549
xmlElementContentPtr
6550
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6551
/* stub left for API/ABI compat */
6552
return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6556
* xmlParseElementContentDecl:
6557
* @ctxt: an XML parser context
6558
* @name: the name of the element being defined.
6559
* @result: the Element Content pointer will be stored here if any
6561
* parse the declaration for an Element content either Mixed or Children,
6562
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6564
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6566
* returns: the type of element content XML_ELEMENT_TYPE_xxx
6570
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6571
xmlElementContentPtr *result) {
6573
xmlElementContentPtr tree = NULL;
6574
int inputid = ctxt->input->id;
6580
xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6581
"xmlParseElementContentDecl : %s '(' expected\n", name);
6586
if (ctxt->instate == XML_PARSER_EOF)
6589
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6590
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6591
res = XML_ELEMENT_TYPE_MIXED;
6593
tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6594
res = XML_ELEMENT_TYPE_ELEMENT;
6602
* xmlParseElementDecl:
6603
* @ctxt: an XML parser context
6605
* parse an Element declaration.
6607
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6609
* [ VC: Unique Element Type Declaration ]
6610
* No element type may be declared more than once
6612
* Returns the type of the element, or -1 in case of error
6615
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6616
const xmlChar *name;
6618
xmlElementContentPtr content = NULL;
6620
/* GROW; done in the caller */
6621
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6622
xmlParserInputPtr input = ctxt->input;
6625
if (!IS_BLANK_CH(CUR)) {
6626
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6627
"Space required after 'ELEMENT'\n");
6630
name = xmlParseName(ctxt);
6632
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6633
"xmlParseElementDecl: no name for Element\n");
6636
while ((RAW == 0) && (ctxt->inputNr > 1))
6638
if (!IS_BLANK_CH(CUR)) {
6639
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6640
"Space required after the element name\n");
6643
if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6646
* Element must always be empty.
6648
ret = XML_ELEMENT_TYPE_EMPTY;
6649
} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6653
* Element is a generic container.
6655
ret = XML_ELEMENT_TYPE_ANY;
6656
} else if (RAW == '(') {
6657
ret = xmlParseElementContentDecl(ctxt, name, &content);
6660
* [ WFC: PEs in Internal Subset ] error handling.
6662
if ((RAW == '%') && (ctxt->external == 0) &&
6663
(ctxt->inputNr == 1)) {
6664
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6665
"PEReference: forbidden within markup decl in internal subset\n");
6667
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6668
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6675
* Pop-up of finished entities.
6677
while ((RAW == 0) && (ctxt->inputNr > 1))
6682
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6683
if (content != NULL) {
6684
xmlFreeDocElementContent(ctxt->myDoc, content);
6687
if (input != ctxt->input) {
6688
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6689
"Element declaration doesn't start and stop in the same entity\n");
6693
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6694
(ctxt->sax->elementDecl != NULL)) {
6695
if (content != NULL)
6696
content->parent = NULL;
6697
ctxt->sax->elementDecl(ctxt->userData, name, ret,
6699
if ((content != NULL) && (content->parent == NULL)) {
6701
* this is a trick: if xmlAddElementDecl is called,
6702
* instead of copying the full tree it is plugged directly
6703
* if called from the parser. Avoid duplicating the
6704
* interfaces or change the API/ABI
6706
xmlFreeDocElementContent(ctxt->myDoc, content);
6708
} else if (content != NULL) {
6709
xmlFreeDocElementContent(ctxt->myDoc, content);
6717
* xmlParseConditionalSections
6718
* @ctxt: an XML parser context
6720
* [61] conditionalSect ::= includeSect | ignoreSect
6721
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6722
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6723
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6724
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6728
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6729
int id = ctxt->input->id;
6733
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6737
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6739
if (ctxt->input->id != id) {
6740
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6741
"All markup of the conditional section is not in the same entity\n",
6746
if (xmlParserDebugEntities) {
6747
if ((ctxt->input != NULL) && (ctxt->input->filename))
6748
xmlGenericError(xmlGenericErrorContext,
6749
"%s(%d): ", ctxt->input->filename,
6751
xmlGenericError(xmlGenericErrorContext,
6752
"Entering INCLUDE Conditional Section\n");
6755
while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6756
(NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6757
const xmlChar *check = CUR_PTR;
6758
unsigned int cons = ctxt->input->consumed;
6760
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6761
xmlParseConditionalSections(ctxt);
6762
} else if (IS_BLANK_CH(CUR)) {
6764
} else if (RAW == '%') {
6765
xmlParsePEReference(ctxt);
6767
xmlParseMarkupDecl(ctxt);
6770
* Pop-up of finished entities.
6772
while ((RAW == 0) && (ctxt->inputNr > 1))
6775
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6776
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6780
if (xmlParserDebugEntities) {
6781
if ((ctxt->input != NULL) && (ctxt->input->filename))
6782
xmlGenericError(xmlGenericErrorContext,
6783
"%s(%d): ", ctxt->input->filename,
6785
xmlGenericError(xmlGenericErrorContext,
6786
"Leaving INCLUDE Conditional Section\n");
6789
} else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6791
xmlParserInputState instate;
6797
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6799
if (ctxt->input->id != id) {
6800
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801
"All markup of the conditional section is not in the same entity\n",
6806
if (xmlParserDebugEntities) {
6807
if ((ctxt->input != NULL) && (ctxt->input->filename))
6808
xmlGenericError(xmlGenericErrorContext,
6809
"%s(%d): ", ctxt->input->filename,
6811
xmlGenericError(xmlGenericErrorContext,
6812
"Entering IGNORE Conditional Section\n");
6816
* Parse up to the end of the conditional section
6817
* But disable SAX event generating DTD building in the meantime
6819
state = ctxt->disableSAX;
6820
instate = ctxt->instate;
6821
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6822
ctxt->instate = XML_PARSER_IGNORE;
6824
while (((depth >= 0) && (RAW != 0)) &&
6825
(ctxt->instate != XML_PARSER_EOF)) {
6826
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6831
if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6832
if (--depth >= 0) SKIP(3);
6839
ctxt->disableSAX = state;
6840
ctxt->instate = instate;
6842
if (xmlParserDebugEntities) {
6843
if ((ctxt->input != NULL) && (ctxt->input->filename))
6844
xmlGenericError(xmlGenericErrorContext,
6845
"%s(%d): ", ctxt->input->filename,
6847
xmlGenericError(xmlGenericErrorContext,
6848
"Leaving IGNORE Conditional Section\n");
6852
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6859
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6861
if (ctxt->input->id != id) {
6862
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6863
"All markup of the conditional section is not in the same entity\n",
6871
* xmlParseMarkupDecl:
6872
* @ctxt: an XML parser context
6874
* parse Markup declarations
6876
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6877
* NotationDecl | PI | Comment
6879
* [ VC: Proper Declaration/PE Nesting ]
6880
* Parameter-entity replacement text must be properly nested with
6881
* markup declarations. That is to say, if either the first character
6882
* or the last character of a markup declaration (markupdecl above) is
6883
* contained in the replacement text for a parameter-entity reference,
6884
* both must be contained in the same replacement text.
6886
* [ WFC: PEs in Internal Subset ]
6887
* In the internal DTD subset, parameter-entity references can occur
6888
* only where markup declarations can occur, not within markup declarations.
6889
* (This does not apply to references that occur in external parameter
6890
* entities or to the external subset.)
6893
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6896
if (NXT(1) == '!') {
6900
xmlParseElementDecl(ctxt);
6901
else if (NXT(3) == 'N')
6902
xmlParseEntityDecl(ctxt);
6905
xmlParseAttributeListDecl(ctxt);
6908
xmlParseNotationDecl(ctxt);
6911
xmlParseComment(ctxt);
6914
/* there is an error but it will be detected later */
6917
} else if (NXT(1) == '?') {
6922
* This is only for internal subset. On external entities,
6923
* the replacement is done before parsing stage
6925
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6926
xmlParsePEReference(ctxt);
6929
* Conditional sections are allowed from entities included
6930
* by PE References in the internal subset.
6932
if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6933
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6934
xmlParseConditionalSections(ctxt);
6938
ctxt->instate = XML_PARSER_DTD;
6943
* @ctxt: an XML parser context
6945
* parse an XML declaration header for external entities
6947
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6951
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6953
const xmlChar *encoding;
6956
* We know that '<?xml' is here.
6958
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6961
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6965
if (!IS_BLANK_CH(CUR)) {
6966
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6967
"Space needed after '<?xml'\n");
6972
* We may have the VersionInfo here.
6974
version = xmlParseVersionInfo(ctxt);
6975
if (version == NULL)
6976
version = xmlCharStrdup(XML_DEFAULT_VERSION);
6978
if (!IS_BLANK_CH(CUR)) {
6979
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6980
"Space needed here\n");
6983
ctxt->input->version = version;
6986
* We must have the encoding declaration
6988
encoding = xmlParseEncodingDecl(ctxt);
6989
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6991
* The XML REC instructs us to stop parsing right here
6995
if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6996
xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6997
"Missing encoding in text declaration\n");
7001
if ((RAW == '?') && (NXT(1) == '>')) {
7003
} else if (RAW == '>') {
7004
/* Deprecated old WD ... */
7005
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7008
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7009
MOVETO_ENDTAG(CUR_PTR);
7015
* xmlParseExternalSubset:
7016
* @ctxt: an XML parser context
7017
* @ExternalID: the external identifier
7018
* @SystemID: the system identifier (or URL)
7020
* parse Markup declarations from an external subset
7022
* [30] extSubset ::= textDecl? extSubsetDecl
7024
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7027
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7028
const xmlChar *SystemID) {
7029
xmlDetectSAX2(ctxt);
7032
if ((ctxt->encoding == NULL) &&
7033
(ctxt->input->end - ctxt->input->cur >= 4)) {
7035
xmlCharEncoding enc;
7041
enc = xmlDetectCharEncoding(start, 4);
7042
if (enc != XML_CHAR_ENCODING_NONE)
7043
xmlSwitchEncoding(ctxt, enc);
7046
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7047
xmlParseTextDecl(ctxt);
7048
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7050
* The XML REC instructs us to stop parsing right here
7052
ctxt->instate = XML_PARSER_EOF;
7056
if (ctxt->myDoc == NULL) {
7057
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7058
if (ctxt->myDoc == NULL) {
7059
xmlErrMemory(ctxt, "New Doc failed");
7062
ctxt->myDoc->properties = XML_DOC_INTERNAL;
7064
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7065
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7067
ctxt->instate = XML_PARSER_DTD;
7069
while (((RAW == '<') && (NXT(1) == '?')) ||
7070
((RAW == '<') && (NXT(1) == '!')) ||
7071
(RAW == '%') || IS_BLANK_CH(CUR)) {
7072
const xmlChar *check = CUR_PTR;
7073
unsigned int cons = ctxt->input->consumed;
7076
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7077
xmlParseConditionalSections(ctxt);
7078
} else if (IS_BLANK_CH(CUR)) {
7080
} else if (RAW == '%') {
7081
xmlParsePEReference(ctxt);
7083
xmlParseMarkupDecl(ctxt);
7086
* Pop-up of finished entities.
7088
while ((RAW == 0) && (ctxt->inputNr > 1))
7091
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7092
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7098
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7104
* xmlParseReference:
7105
* @ctxt: an XML parser context
7107
* parse and handle entity references in content, depending on the SAX
7108
* interface, this may end-up in a call to character() if this is a
7109
* CharRef, a predefined entity, if there is no reference() callback.
7110
* or if the parser was asked to switch to that mode.
7112
* [67] Reference ::= EntityRef | CharRef
7115
xmlParseReference(xmlParserCtxtPtr ctxt) {
7119
xmlNodePtr list = NULL;
7120
xmlParserErrors ret = XML_ERR_OK;
7127
* Simple case of a CharRef
7129
if (NXT(1) == '#') {
7133
int value = xmlParseCharRef(ctxt);
7137
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7139
* So we are using non-UTF-8 buffers
7140
* Check that the char fit on 8bits, if not
7141
* generate a CharRef.
7143
if (value <= 0xFF) {
7146
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7147
(!ctxt->disableSAX))
7148
ctxt->sax->characters(ctxt->userData, out, 1);
7150
if ((hex == 'x') || (hex == 'X'))
7151
snprintf((char *)out, sizeof(out), "#x%X", value);
7153
snprintf((char *)out, sizeof(out), "#%d", value);
7154
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7155
(!ctxt->disableSAX))
7156
ctxt->sax->reference(ctxt->userData, out);
7160
* Just encode the value in UTF-8
7162
COPY_BUF(0 ,out, i, value);
7164
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7165
(!ctxt->disableSAX))
7166
ctxt->sax->characters(ctxt->userData, out, i);
7172
* We are seeing an entity reference
7174
ent = xmlParseEntityRef(ctxt);
7175
if (ent == NULL) return;
7176
if (!ctxt->wellFormed)
7178
was_checked = ent->checked;
7180
/* special case of predefined entities */
7181
if ((ent->name == NULL) ||
7182
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7184
if (val == NULL) return;
7186
* inline the entity.
7188
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7189
(!ctxt->disableSAX))
7190
ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7195
* The first reference to the entity trigger a parsing phase
7196
* where the ent->children is filled with the result from
7198
* Note: external parsed entities will not be loaded, it is not
7199
* required for a non-validating parser, unless the parsing option
7200
* of validating, or substituting entities were given. Doing so is
7201
* far more secure as the parser will only process data coming from
7202
* the document entity by default.
7204
if ((ent->checked == 0) &&
7205
((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7206
(ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7207
unsigned long oldnbent = ctxt->nbentities;
7210
* This is a bit hackish but this seems the best
7211
* way to make sure both SAX and DOM entity support
7215
if (ctxt->userData == ctxt)
7218
user_data = ctxt->userData;
7221
* Check that this entity is well formed
7222
* 4.3.2: An internal general parsed entity is well-formed
7223
* if its replacement text matches the production labeled
7226
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7228
ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7232
} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7234
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7235
user_data, ctxt->depth, ent->URI,
7236
ent->ExternalID, &list);
7239
ret = XML_ERR_ENTITY_PE_INTERNAL;
7240
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7241
"invalid entity type found\n", NULL);
7245
* Store the number of entities needing parsing for this entity
7246
* content and do checkings
7248
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7249
if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7251
if (ret == XML_ERR_ENTITY_LOOP) {
7252
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7253
xmlFreeNodeList(list);
7256
if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7257
xmlFreeNodeList(list);
7261
if ((ret == XML_ERR_OK) && (list != NULL)) {
7262
if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7263
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7264
(ent->children == NULL)) {
7265
ent->children = list;
7266
if (ctxt->replaceEntities) {
7268
* Prune it directly in the generated document
7269
* except for single text nodes.
7271
if (((list->type == XML_TEXT_NODE) &&
7272
(list->next == NULL)) ||
7273
(ctxt->parseMode == XML_PARSE_READER)) {
7274
list->parent = (xmlNodePtr) ent;
7279
while (list != NULL) {
7280
list->parent = (xmlNodePtr) ctxt->node;
7281
list->doc = ctxt->myDoc;
7282
if (list->next == NULL)
7286
list = ent->children;
7287
#ifdef LIBXML_LEGACY_ENABLED
7288
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7289
xmlAddEntityReference(ent, list, NULL);
7290
#endif /* LIBXML_LEGACY_ENABLED */
7294
while (list != NULL) {
7295
list->parent = (xmlNodePtr) ent;
7296
xmlSetTreeDoc(list, ent->doc);
7297
if (list->next == NULL)
7303
xmlFreeNodeList(list);
7306
} else if ((ret != XML_ERR_OK) &&
7307
(ret != XML_WAR_UNDECLARED_ENTITY)) {
7308
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7309
"Entity '%s' failed to parse\n", ent->name);
7310
} else if (list != NULL) {
7311
xmlFreeNodeList(list);
7314
if (ent->checked == 0)
7316
} else if (ent->checked != 1) {
7317
ctxt->nbentities += ent->checked / 2;
7321
* Now that the entity content has been gathered
7322
* provide it to the application, this can take different forms based
7323
* on the parsing modes.
7325
if (ent->children == NULL) {
7327
* Probably running in SAX mode and the callbacks don't
7328
* build the entity content. So unless we already went
7329
* though parsing for first checking go though the entity
7330
* content to generate callbacks associated to the entity
7332
if (was_checked != 0) {
7335
* This is a bit hackish but this seems the best
7336
* way to make sure both SAX and DOM entity support
7339
if (ctxt->userData == ctxt)
7342
user_data = ctxt->userData;
7344
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7346
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7347
ent->content, user_data, NULL);
7349
} else if (ent->etype ==
7350
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7352
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7353
ctxt->sax, user_data, ctxt->depth,
7354
ent->URI, ent->ExternalID, NULL);
7357
ret = XML_ERR_ENTITY_PE_INTERNAL;
7358
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7359
"invalid entity type found\n", NULL);
7361
if (ret == XML_ERR_ENTITY_LOOP) {
7362
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7366
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7367
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7369
* Entity reference callback comes second, it's somewhat
7370
* superfluous but a compatibility to historical behaviour
7372
ctxt->sax->reference(ctxt->userData, ent->name);
7378
* If we didn't get any children for the entity being built
7380
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7381
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7385
ctxt->sax->reference(ctxt->userData, ent->name);
7389
if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7391
* There is a problem on the handling of _private for entities
7392
* (bug 155816): Should we copy the content of the field from
7393
* the entity (possibly overwriting some value set by the user
7394
* when a copy is created), should we leave it alone, or should
7395
* we try to take care of different situations? The problem
7396
* is exacerbated by the usage of this field by the xmlReader.
7397
* To fix this bug, we look at _private on the created node
7398
* and, if it's NULL, we copy in whatever was in the entity.
7399
* If it's not NULL we leave it alone. This is somewhat of a
7400
* hack - maybe we should have further tests to determine
7403
if ((ctxt->node != NULL) && (ent->children != NULL)) {
7405
* Seems we are generating the DOM content, do
7406
* a simple tree copy for all references except the first
7407
* In the first occurrence list contains the replacement.
7409
if (((list == NULL) && (ent->owner == 0)) ||
7410
(ctxt->parseMode == XML_PARSE_READER)) {
7411
xmlNodePtr nw = NULL, cur, firstChild = NULL;
7414
* We are copying here, make sure there is no abuse
7416
ctxt->sizeentcopy += ent->length;
7417
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7421
* when operating on a reader, the entities definitions
7422
* are always owning the entities subtree.
7423
if (ctxt->parseMode == XML_PARSE_READER)
7427
cur = ent->children;
7428
while (cur != NULL) {
7429
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7431
if (nw->_private == NULL)
7432
nw->_private = cur->_private;
7433
if (firstChild == NULL){
7436
nw = xmlAddChild(ctxt->node, nw);
7438
if (cur == ent->last) {
7440
* needed to detect some strange empty
7441
* node cases in the reader tests
7443
if ((ctxt->parseMode == XML_PARSE_READER) &&
7445
(nw->type == XML_ELEMENT_NODE) &&
7446
(nw->children == NULL))
7453
#ifdef LIBXML_LEGACY_ENABLED
7454
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7455
xmlAddEntityReference(ent, firstChild, nw);
7456
#endif /* LIBXML_LEGACY_ENABLED */
7457
} else if ((list == NULL) || (ctxt->inputNr > 0)) {
7458
xmlNodePtr nw = NULL, cur, next, last,
7462
* We are copying here, make sure there is no abuse
7464
ctxt->sizeentcopy += ent->length;
7465
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7469
* Copy the entity child list and make it the new
7470
* entity child list. The goal is to make sure any
7471
* ID or REF referenced will be the one from the
7472
* document content and not the entity copy.
7474
cur = ent->children;
7475
ent->children = NULL;
7478
while (cur != NULL) {
7482
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7484
if (nw->_private == NULL)
7485
nw->_private = cur->_private;
7486
if (firstChild == NULL){
7489
xmlAddChild((xmlNodePtr) ent, nw);
7490
xmlAddChild(ctxt->node, cur);
7496
if (ent->owner == 0)
7498
#ifdef LIBXML_LEGACY_ENABLED
7499
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7500
xmlAddEntityReference(ent, firstChild, nw);
7501
#endif /* LIBXML_LEGACY_ENABLED */
7503
const xmlChar *nbktext;
7506
* the name change is to avoid coalescing of the
7507
* node with a possible previous text one which
7508
* would make ent->children a dangling pointer
7510
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7512
if (ent->children->type == XML_TEXT_NODE)
7513
ent->children->name = nbktext;
7514
if ((ent->last != ent->children) &&
7515
(ent->last->type == XML_TEXT_NODE))
7516
ent->last->name = nbktext;
7517
xmlAddChildList(ctxt->node, ent->children);
7521
* This is to avoid a nasty side effect, see
7522
* characters() in SAX.c
7532
* xmlParseEntityRef:
7533
* @ctxt: an XML parser context
7535
* parse ENTITY references declarations
7537
* [68] EntityRef ::= '&' Name ';'
7539
* [ WFC: Entity Declared ]
7540
* In a document without any DTD, a document with only an internal DTD
7541
* subset which contains no parameter entity references, or a document
7542
* with "standalone='yes'", the Name given in the entity reference
7543
* must match that in an entity declaration, except that well-formed
7544
* documents need not declare any of the following entities: amp, lt,
7545
* gt, apos, quot. The declaration of a parameter entity must precede
7546
* any reference to it. Similarly, the declaration of a general entity
7547
* must precede any reference to it which appears in a default value in an
7548
* attribute-list declaration. Note that if entities are declared in the
7549
* external subset or in external parameter entities, a non-validating
7550
* processor is not obligated to read and process their declarations;
7551
* for such documents, the rule that an entity must be declared is a
7552
* well-formedness constraint only if standalone='yes'.
7554
* [ WFC: Parsed Entity ]
7555
* An entity reference must not contain the name of an unparsed entity
7557
* Returns the xmlEntityPtr if found, or NULL otherwise.
7560
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7561
const xmlChar *name;
7562
xmlEntityPtr ent = NULL;
7565
if (ctxt->instate == XML_PARSER_EOF)
7571
name = xmlParseName(ctxt);
7573
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7574
"xmlParseEntityRef: no name\n");
7578
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7584
* Predefined entities override any extra definition
7586
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7587
ent = xmlGetPredefinedEntity(name);
7593
* Increase the number of entity references parsed
7598
* Ask first SAX for entity resolution, otherwise try the
7599
* entities which may have stored in the parser context.
7601
if (ctxt->sax != NULL) {
7602
if (ctxt->sax->getEntity != NULL)
7603
ent = ctxt->sax->getEntity(ctxt->userData, name);
7604
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7605
(ctxt->options & XML_PARSE_OLDSAX))
7606
ent = xmlGetPredefinedEntity(name);
7607
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7608
(ctxt->userData==ctxt)) {
7609
ent = xmlSAX2GetEntity(ctxt, name);
7612
if (ctxt->instate == XML_PARSER_EOF)
7615
* [ WFC: Entity Declared ]
7616
* In a document without any DTD, a document with only an
7617
* internal DTD subset which contains no parameter entity
7618
* references, or a document with "standalone='yes'", the
7619
* Name given in the entity reference must match that in an
7620
* entity declaration, except that well-formed documents
7621
* need not declare any of the following entities: amp, lt,
7623
* The declaration of a parameter entity must precede any
7625
* Similarly, the declaration of a general entity must
7626
* precede any reference to it which appears in a default
7627
* value in an attribute-list declaration. Note that if
7628
* entities are declared in the external subset or in
7629
* external parameter entities, a non-validating processor
7630
* is not obligated to read and process their declarations;
7631
* for such documents, the rule that an entity must be
7632
* declared is a well-formedness constraint only if
7636
if ((ctxt->standalone == 1) ||
7637
((ctxt->hasExternalSubset == 0) &&
7638
(ctxt->hasPErefs == 0))) {
7639
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7640
"Entity '%s' not defined\n", name);
7642
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7643
"Entity '%s' not defined\n", name);
7644
if ((ctxt->inSubset == 0) &&
7645
(ctxt->sax != NULL) &&
7646
(ctxt->sax->reference != NULL)) {
7647
ctxt->sax->reference(ctxt->userData, name);
7654
* [ WFC: Parsed Entity ]
7655
* An entity reference must not contain the name of an
7658
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7659
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7660
"Entity reference to unparsed entity %s\n", name);
7664
* [ WFC: No External Entity References ]
7665
* Attribute values cannot contain direct or indirect
7666
* entity references to external entities.
7668
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7669
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7670
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7671
"Attribute references external entity '%s'\n", name);
7674
* [ WFC: No < in Attribute Values ]
7675
* The replacement text of any entity referred to directly or
7676
* indirectly in an attribute value (other than "<") must
7679
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7681
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7682
if ((ent->checked & 1) || ((ent->checked == 0) &&
7683
(ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7684
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7685
"'<' in entity '%s' is not allowed in attributes values\n", name);
7690
* Internal check, no parameter entities here ...
7693
switch (ent->etype) {
7694
case XML_INTERNAL_PARAMETER_ENTITY:
7695
case XML_EXTERNAL_PARAMETER_ENTITY:
7696
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7697
"Attempt to reference the parameter entity '%s'\n",
7706
* [ WFC: No Recursion ]
7707
* A parsed entity must not contain a recursive reference
7708
* to itself, either directly or indirectly.
7709
* Done somewhere else
7715
* xmlParseStringEntityRef:
7716
* @ctxt: an XML parser context
7717
* @str: a pointer to an index in the string
7719
* parse ENTITY references declarations, but this version parses it from
7722
* [68] EntityRef ::= '&' Name ';'
7724
* [ WFC: Entity Declared ]
7725
* In a document without any DTD, a document with only an internal DTD
7726
* subset which contains no parameter entity references, or a document
7727
* with "standalone='yes'", the Name given in the entity reference
7728
* must match that in an entity declaration, except that well-formed
7729
* documents need not declare any of the following entities: amp, lt,
7730
* gt, apos, quot. The declaration of a parameter entity must precede
7731
* any reference to it. Similarly, the declaration of a general entity
7732
* must precede any reference to it which appears in a default value in an
7733
* attribute-list declaration. Note that if entities are declared in the
7734
* external subset or in external parameter entities, a non-validating
7735
* processor is not obligated to read and process their declarations;
7736
* for such documents, the rule that an entity must be declared is a
7737
* well-formedness constraint only if standalone='yes'.
7739
* [ WFC: Parsed Entity ]
7740
* An entity reference must not contain the name of an unparsed entity
7742
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7743
* is updated to the current location in the string.
7746
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7750
xmlEntityPtr ent = NULL;
7752
if ((str == NULL) || (*str == NULL))
7760
name = xmlParseStringName(ctxt, &ptr);
7762
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7763
"xmlParseStringEntityRef: no name\n");
7768
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7777
* Predefined entites override any extra definition
7779
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7780
ent = xmlGetPredefinedEntity(name);
7789
* Increate the number of entity references parsed
7794
* Ask first SAX for entity resolution, otherwise try the
7795
* entities which may have stored in the parser context.
7797
if (ctxt->sax != NULL) {
7798
if (ctxt->sax->getEntity != NULL)
7799
ent = ctxt->sax->getEntity(ctxt->userData, name);
7800
if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7801
ent = xmlGetPredefinedEntity(name);
7802
if ((ent == NULL) && (ctxt->userData==ctxt)) {
7803
ent = xmlSAX2GetEntity(ctxt, name);
7806
if (ctxt->instate == XML_PARSER_EOF) {
7812
* [ WFC: Entity Declared ]
7813
* In a document without any DTD, a document with only an
7814
* internal DTD subset which contains no parameter entity
7815
* references, or a document with "standalone='yes'", the
7816
* Name given in the entity reference must match that in an
7817
* entity declaration, except that well-formed documents
7818
* need not declare any of the following entities: amp, lt,
7820
* The declaration of a parameter entity must precede any
7822
* Similarly, the declaration of a general entity must
7823
* precede any reference to it which appears in a default
7824
* value in an attribute-list declaration. Note that if
7825
* entities are declared in the external subset or in
7826
* external parameter entities, a non-validating processor
7827
* is not obligated to read and process their declarations;
7828
* for such documents, the rule that an entity must be
7829
* declared is a well-formedness constraint only if
7833
if ((ctxt->standalone == 1) ||
7834
((ctxt->hasExternalSubset == 0) &&
7835
(ctxt->hasPErefs == 0))) {
7836
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7837
"Entity '%s' not defined\n", name);
7839
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7840
"Entity '%s' not defined\n",
7843
/* TODO ? check regressions ctxt->valid = 0; */
7847
* [ WFC: Parsed Entity ]
7848
* An entity reference must not contain the name of an
7851
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7852
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7853
"Entity reference to unparsed entity %s\n", name);
7857
* [ WFC: No External Entity References ]
7858
* Attribute values cannot contain direct or indirect
7859
* entity references to external entities.
7861
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7862
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7863
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7864
"Attribute references external entity '%s'\n", name);
7867
* [ WFC: No < in Attribute Values ]
7868
* The replacement text of any entity referred to directly or
7869
* indirectly in an attribute value (other than "<") must
7872
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7873
(ent != NULL) && (ent->content != NULL) &&
7874
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7875
(xmlStrchr(ent->content, '<'))) {
7876
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7877
"'<' in entity '%s' is not allowed in attributes values\n",
7882
* Internal check, no parameter entities here ...
7885
switch (ent->etype) {
7886
case XML_INTERNAL_PARAMETER_ENTITY:
7887
case XML_EXTERNAL_PARAMETER_ENTITY:
7888
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7889
"Attempt to reference the parameter entity '%s'\n",
7898
* [ WFC: No Recursion ]
7899
* A parsed entity must not contain a recursive reference
7900
* to itself, either directly or indirectly.
7901
* Done somewhere else
7910
* xmlParsePEReference:
7911
* @ctxt: an XML parser context
7913
* parse PEReference declarations
7914
* The entity content is handled directly by pushing it's content as
7915
* a new input stream.
7917
* [69] PEReference ::= '%' Name ';'
7919
* [ WFC: No Recursion ]
7920
* A parsed entity must not contain a recursive
7921
* reference to itself, either directly or indirectly.
7923
* [ WFC: Entity Declared ]
7924
* In a document without any DTD, a document with only an internal DTD
7925
* subset which contains no parameter entity references, or a document
7926
* with "standalone='yes'", ... ... The declaration of a parameter
7927
* entity must precede any reference to it...
7929
* [ VC: Entity Declared ]
7930
* In a document with an external subset or external parameter entities
7931
* with "standalone='no'", ... ... The declaration of a parameter entity
7932
* must precede any reference to it...
7935
* Parameter-entity references may only appear in the DTD.
7936
* NOTE: misleading but this is handled.
7939
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7941
const xmlChar *name;
7942
xmlEntityPtr entity = NULL;
7943
xmlParserInputPtr input;
7948
name = xmlParseName(ctxt);
7950
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7951
"xmlParsePEReference: no name\n");
7955
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7962
* Increate the number of entity references parsed
7967
* Request the entity from SAX
7969
if ((ctxt->sax != NULL) &&
7970
(ctxt->sax->getParameterEntity != NULL))
7971
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7972
if (ctxt->instate == XML_PARSER_EOF)
7974
if (entity == NULL) {
7976
* [ WFC: Entity Declared ]
7977
* In a document without any DTD, a document with only an
7978
* internal DTD subset which contains no parameter entity
7979
* references, or a document with "standalone='yes'", ...
7980
* ... The declaration of a parameter entity must precede
7981
* any reference to it...
7983
if ((ctxt->standalone == 1) ||
7984
((ctxt->hasExternalSubset == 0) &&
7985
(ctxt->hasPErefs == 0))) {
7986
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7987
"PEReference: %%%s; not found\n",
7991
* [ VC: Entity Declared ]
7992
* In a document with an external subset or external
7993
* parameter entities with "standalone='no'", ...
7994
* ... The declaration of a parameter entity must
7995
* precede any reference to it...
7997
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7998
"PEReference: %%%s; not found\n",
8004
* Internal checking in case the entity quest barfed
8006
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8007
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8008
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8009
"Internal: %%%s; is not a parameter entity\n",
8011
} else if (ctxt->input->free != deallocblankswrapper) {
8012
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8013
if (xmlPushInput(ctxt, input) < 0)
8018
* handle the extra spaces added before and after
8019
* c.f. http://www.w3.org/TR/REC-xml#as-PE
8021
input = xmlNewEntityInputStream(ctxt, entity);
8022
if (xmlPushInput(ctxt, input) < 0)
8024
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8025
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8026
(IS_BLANK_CH(NXT(5)))) {
8027
xmlParseTextDecl(ctxt);
8029
XML_ERR_UNSUPPORTED_ENCODING) {
8031
* The XML REC instructs us to stop parsing
8034
ctxt->instate = XML_PARSER_EOF;
8040
ctxt->hasPErefs = 1;
8044
* xmlLoadEntityContent:
8045
* @ctxt: an XML parser context
8046
* @entity: an unloaded system entity
8048
* Load the original content of the given system entity from the
8049
* ExternalID/SystemID given. This is to be used for Included in Literal
8050
* http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8052
* Returns 0 in case of success and -1 in case of failure
8055
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8056
xmlParserInputPtr input;
8061
if ((ctxt == NULL) || (entity == NULL) ||
8062
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8063
(entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8064
(entity->content != NULL)) {
8065
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8066
"xmlLoadEntityContent parameter error");
8070
if (xmlParserDebugEntities)
8071
xmlGenericError(xmlGenericErrorContext,
8072
"Reading %s entity content input\n", entity->name);
8074
buf = xmlBufferCreate();
8076
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8077
"xmlLoadEntityContent parameter error");
8081
input = xmlNewEntityInputStream(ctxt, entity);
8082
if (input == NULL) {
8083
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8084
"xmlLoadEntityContent input error");
8090
* Push the entity as the current input, read char by char
8091
* saving to the buffer until the end of the entity or an error
8093
if (xmlPushInput(ctxt, input) < 0) {
8100
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8102
xmlBufferAdd(buf, ctxt->input->cur, l);
8103
if (count++ > XML_PARSER_CHUNK_SIZE) {
8106
if (ctxt->instate == XML_PARSER_EOF) {
8116
if (ctxt->instate == XML_PARSER_EOF) {
8124
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8126
} else if (!IS_CHAR(c)) {
8127
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8128
"xmlLoadEntityContent: invalid char value %d\n",
8133
entity->content = buf->content;
8134
buf->content = NULL;
8141
* xmlParseStringPEReference:
8142
* @ctxt: an XML parser context
8143
* @str: a pointer to an index in the string
8145
* parse PEReference declarations
8147
* [69] PEReference ::= '%' Name ';'
8149
* [ WFC: No Recursion ]
8150
* A parsed entity must not contain a recursive
8151
* reference to itself, either directly or indirectly.
8153
* [ WFC: Entity Declared ]
8154
* In a document without any DTD, a document with only an internal DTD
8155
* subset which contains no parameter entity references, or a document
8156
* with "standalone='yes'", ... ... The declaration of a parameter
8157
* entity must precede any reference to it...
8159
* [ VC: Entity Declared ]
8160
* In a document with an external subset or external parameter entities
8161
* with "standalone='no'", ... ... The declaration of a parameter entity
8162
* must precede any reference to it...
8165
* Parameter-entity references may only appear in the DTD.
8166
* NOTE: misleading but this is handled.
8168
* Returns the string of the entity content.
8169
* str is updated to the current value of the index
8172
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8176
xmlEntityPtr entity = NULL;
8178
if ((str == NULL) || (*str == NULL)) return(NULL);
8184
name = xmlParseStringName(ctxt, &ptr);
8186
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8187
"xmlParseStringPEReference: no name\n");
8193
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8201
* Increate the number of entity references parsed
8206
* Request the entity from SAX
8208
if ((ctxt->sax != NULL) &&
8209
(ctxt->sax->getParameterEntity != NULL))
8210
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8211
if (ctxt->instate == XML_PARSER_EOF) {
8215
if (entity == NULL) {
8217
* [ WFC: Entity Declared ]
8218
* In a document without any DTD, a document with only an
8219
* internal DTD subset which contains no parameter entity
8220
* references, or a document with "standalone='yes'", ...
8221
* ... The declaration of a parameter entity must precede
8222
* any reference to it...
8224
if ((ctxt->standalone == 1) ||
8225
((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8226
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8227
"PEReference: %%%s; not found\n", name);
8230
* [ VC: Entity Declared ]
8231
* In a document with an external subset or external
8232
* parameter entities with "standalone='no'", ...
8233
* ... The declaration of a parameter entity must
8234
* precede any reference to it...
8236
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8237
"PEReference: %%%s; not found\n",
8243
* Internal checking in case the entity quest barfed
8245
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8246
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8247
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8248
"%%%s; is not a parameter entity\n",
8252
ctxt->hasPErefs = 1;
8259
* xmlParseDocTypeDecl:
8260
* @ctxt: an XML parser context
8262
* parse a DOCTYPE declaration
8264
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8265
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8267
* [ VC: Root Element Type ]
8268
* The Name in the document type declaration must match the element
8269
* type of the root element.
8273
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8274
const xmlChar *name = NULL;
8275
xmlChar *ExternalID = NULL;
8276
xmlChar *URI = NULL;
8279
* We know that '<!DOCTYPE' has been detected.
8286
* Parse the DOCTYPE name.
8288
name = xmlParseName(ctxt);
8290
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8291
"xmlParseDocTypeDecl : no DOCTYPE name !\n");
8293
ctxt->intSubName = name;
8298
* Check for SystemID and ExternalID
8300
URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8302
if ((URI != NULL) || (ExternalID != NULL)) {
8303
ctxt->hasExternalSubset = 1;
8305
ctxt->extSubURI = URI;
8306
ctxt->extSubSystem = ExternalID;
8311
* Create and update the internal subset.
8313
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8314
(!ctxt->disableSAX))
8315
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8316
if (ctxt->instate == XML_PARSER_EOF)
8320
* Is there any internal subset declarations ?
8321
* they are handled separately in xmlParseInternalSubset()
8327
* We should be at the end of the DOCTYPE declaration.
8330
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8336
* xmlParseInternalSubset:
8337
* @ctxt: an XML parser context
8339
* parse the internal subset declaration
8341
* [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8345
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8347
* Is there any DTD definition ?
8350
ctxt->instate = XML_PARSER_DTD;
8353
* Parse the succession of Markup declarations and
8355
* Subsequence (markupdecl | PEReference | S)*
8357
while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8358
const xmlChar *check = CUR_PTR;
8359
unsigned int cons = ctxt->input->consumed;
8362
xmlParseMarkupDecl(ctxt);
8363
xmlParsePEReference(ctxt);
8366
* Pop-up of finished entities.
8368
while ((RAW == 0) && (ctxt->inputNr > 1))
8371
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8372
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8373
"xmlParseInternalSubset: error detected in Markup declaration\n");
8384
* We should be at the end of the DOCTYPE declaration.
8387
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8392
#ifdef LIBXML_SAX1_ENABLED
8394
* xmlParseAttribute:
8395
* @ctxt: an XML parser context
8396
* @value: a xmlChar ** used to store the value of the attribute
8398
* parse an attribute
8400
* [41] Attribute ::= Name Eq AttValue
8402
* [ WFC: No External Entity References ]
8403
* Attribute values cannot contain direct or indirect entity references
8404
* to external entities.
8406
* [ WFC: No < in Attribute Values ]
8407
* The replacement text of any entity referred to directly or indirectly in
8408
* an attribute value (other than "<") must not contain a <.
8410
* [ VC: Attribute Value Type ]
8411
* The attribute must have been declared; the value must be of the type
8414
* [25] Eq ::= S? '=' S?
8418
* [NS 11] Attribute ::= QName Eq AttValue
8420
* Also the case QName == xmlns:??? is handled independently as a namespace
8423
* Returns the attribute name, and the value in *value.
8427
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8428
const xmlChar *name;
8433
name = xmlParseName(ctxt);
8435
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8436
"error parsing attribute name\n");
8447
val = xmlParseAttValue(ctxt);
8448
ctxt->instate = XML_PARSER_CONTENT;
8450
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8451
"Specification mandate value for attribute %s\n", name);
8456
* Check that xml:lang conforms to the specification
8457
* No more registered as an error, just generate a warning now
8458
* since this was deprecated in XML second edition
8460
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8461
if (!xmlCheckLanguageID(val)) {
8462
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8463
"Malformed value for xml:lang : %s\n",
8469
* Check that xml:space conforms to the specification
8471
if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8472
if (xmlStrEqual(val, BAD_CAST "default"))
8474
else if (xmlStrEqual(val, BAD_CAST "preserve"))
8477
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8478
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8489
* @ctxt: an XML parser context
8491
* parse a start of tag either for rule element or
8492
* EmptyElement. In both case we don't parse the tag closing chars.
8494
* [40] STag ::= '<' Name (S Attribute)* S? '>'
8496
* [ WFC: Unique Att Spec ]
8497
* No attribute name may appear more than once in the same start-tag or
8498
* empty-element tag.
8500
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8502
* [ WFC: Unique Att Spec ]
8503
* No attribute name may appear more than once in the same start-tag or
8504
* empty-element tag.
8508
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8510
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8512
* Returns the element name parsed
8516
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8517
const xmlChar *name;
8518
const xmlChar *attname;
8520
const xmlChar **atts = ctxt->atts;
8522
int maxatts = ctxt->maxatts;
8525
if (RAW != '<') return(NULL);
8528
name = xmlParseName(ctxt);
8530
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8531
"xmlParseStartTag: invalid element name\n");
8536
* Now parse the attributes, it ends up with the ending
8543
while (((RAW != '>') &&
8544
((RAW != '/') || (NXT(1) != '>')) &&
8545
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8546
const xmlChar *q = CUR_PTR;
8547
unsigned int cons = ctxt->input->consumed;
8549
attname = xmlParseAttribute(ctxt, &attvalue);
8550
if ((attname != NULL) && (attvalue != NULL)) {
8552
* [ WFC: Unique Att Spec ]
8553
* No attribute name may appear more than once in the same
8554
* start-tag or empty-element tag.
8556
for (i = 0; i < nbatts;i += 2) {
8557
if (xmlStrEqual(atts[i], attname)) {
8558
xmlErrAttributeDup(ctxt, NULL, attname);
8564
* Add the pair to atts
8567
maxatts = 22; /* allow for 10 attrs by default */
8568
atts = (const xmlChar **)
8569
xmlMalloc(maxatts * sizeof(xmlChar *));
8571
xmlErrMemory(ctxt, NULL);
8572
if (attvalue != NULL)
8577
ctxt->maxatts = maxatts;
8578
} else if (nbatts + 4 > maxatts) {
8582
n = (const xmlChar **) xmlRealloc((void *) atts,
8583
maxatts * sizeof(const xmlChar *));
8585
xmlErrMemory(ctxt, NULL);
8586
if (attvalue != NULL)
8592
ctxt->maxatts = maxatts;
8594
atts[nbatts++] = attname;
8595
atts[nbatts++] = attvalue;
8596
atts[nbatts] = NULL;
8597
atts[nbatts + 1] = NULL;
8599
if (attvalue != NULL)
8606
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8608
if (!IS_BLANK_CH(RAW)) {
8609
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8610
"attributes construct error\n");
8613
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8614
(attname == NULL) && (attvalue == NULL)) {
8615
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8616
"xmlParseStartTag: problem parsing attributes\n");
8624
* SAX: Start of Element !
8626
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8627
(!ctxt->disableSAX)) {
8629
ctxt->sax->startElement(ctxt->userData, name, atts);
8631
ctxt->sax->startElement(ctxt->userData, name, NULL);
8635
/* Free only the content strings */
8636
for (i = 1;i < nbatts;i+=2)
8637
if (atts[i] != NULL)
8638
xmlFree((xmlChar *) atts[i]);
8645
* @ctxt: an XML parser context
8646
* @line: line of the start tag
8647
* @nsNr: number of namespaces on the start tag
8649
* parse an end of tag
8651
* [42] ETag ::= '</' Name S? '>'
8655
* [NS 9] ETag ::= '</' QName S? '>'
8659
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8660
const xmlChar *name;
8663
if ((RAW != '<') || (NXT(1) != '/')) {
8664
xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8665
"xmlParseEndTag: '</' not found\n");
8670
name = xmlParseNameAndCompare(ctxt,ctxt->name);
8673
* We should definitely be at the ending "S? '>'" part
8677
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8678
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8683
* [ WFC: Element Type Match ]
8684
* The Name in an element's end-tag must match the element type in the
8688
if (name != (xmlChar*)1) {
8689
if (name == NULL) name = BAD_CAST "unparseable";
8690
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8691
"Opening and ending tag mismatch: %s line %d and %s\n",
8692
ctxt->name, line, name);
8698
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8699
(!ctxt->disableSAX))
8700
ctxt->sax->endElement(ctxt->userData, ctxt->name);
8709
* @ctxt: an XML parser context
8711
* parse an end of tag
8713
* [42] ETag ::= '</' Name S? '>'
8717
* [NS 9] ETag ::= '</' QName S? '>'
8721
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8722
xmlParseEndTag1(ctxt, 0);
8724
#endif /* LIBXML_SAX1_ENABLED */
8726
/************************************************************************
8728
* SAX 2 specific operations *
8730
************************************************************************/
8734
* @ctxt: an XML parser context
8735
* @prefix: the prefix to lookup
8737
* Lookup the namespace name for the @prefix (which ca be NULL)
8738
* The prefix must come from the @ctxt->dict dictionnary
8740
* Returns the namespace name or NULL if not bound
8742
static const xmlChar *
8743
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8746
if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8747
for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8748
if (ctxt->nsTab[i] == prefix) {
8749
if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8751
return(ctxt->nsTab[i + 1]);
8758
* @ctxt: an XML parser context
8759
* @prefix: pointer to store the prefix part
8761
* parse an XML Namespace QName
8763
* [6] QName ::= (Prefix ':')? LocalPart
8764
* [7] Prefix ::= NCName
8765
* [8] LocalPart ::= NCName
8767
* Returns the Name parsed or NULL
8770
static const xmlChar *
8771
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8772
const xmlChar *l, *p;
8776
l = xmlParseNCName(ctxt);
8779
l = xmlParseName(ctxt);
8781
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8782
"Failed to parse QName '%s'\n", l, NULL, NULL);
8792
l = xmlParseNCName(ctxt);
8796
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8797
"Failed to parse QName '%s:'\n", p, NULL, NULL);
8798
l = xmlParseNmtoken(ctxt);
8800
tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8802
tmp = xmlBuildQName(l, p, NULL, 0);
8805
p = xmlDictLookup(ctxt->dict, tmp, -1);
8806
if (tmp != NULL) xmlFree(tmp);
8813
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8814
"Failed to parse QName '%s:%s:'\n", p, l, NULL);
8816
tmp = (xmlChar *) xmlParseName(ctxt);
8818
tmp = xmlBuildQName(tmp, l, NULL, 0);
8819
l = xmlDictLookup(ctxt->dict, tmp, -1);
8820
if (tmp != NULL) xmlFree(tmp);
8824
tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8825
l = xmlDictLookup(ctxt->dict, tmp, -1);
8826
if (tmp != NULL) xmlFree(tmp);
8837
* xmlParseQNameAndCompare:
8838
* @ctxt: an XML parser context
8839
* @name: the localname
8840
* @prefix: the prefix, if any.
8842
* parse an XML name and compares for match
8843
* (specialized for endtag parsing)
8845
* Returns NULL for an illegal name, (xmlChar*) 1 for success
8846
* and the name for mismatch
8849
static const xmlChar *
8850
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8851
xmlChar const *prefix) {
8855
const xmlChar *prefix2;
8857
if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8860
in = ctxt->input->cur;
8863
while (*in != 0 && *in == *cmp) {
8867
if ((*cmp == 0) && (*in == ':')) {
8870
while (*in != 0 && *in == *cmp) {
8874
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8876
ctxt->input->cur = in;
8877
return((const xmlChar*) 1);
8881
* all strings coms from the dictionary, equality can be done directly
8883
ret = xmlParseQName (ctxt, &prefix2);
8884
if ((ret == name) && (prefix == prefix2))
8885
return((const xmlChar*) 1);
8890
* xmlParseAttValueInternal:
8891
* @ctxt: an XML parser context
8892
* @len: attribute len result
8893
* @alloc: whether the attribute was reallocated as a new string
8894
* @normalize: if 1 then further non-CDATA normalization must be done
8896
* parse a value for an attribute.
8897
* NOTE: if no normalization is needed, the routine will return pointers
8898
* directly from the data buffer.
8900
* 3.3.3 Attribute-Value Normalization:
8901
* Before the value of an attribute is passed to the application or
8902
* checked for validity, the XML processor must normalize it as follows:
8903
* - a character reference is processed by appending the referenced
8904
* character to the attribute value
8905
* - an entity reference is processed by recursively processing the
8906
* replacement text of the entity
8907
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8908
* appending #x20 to the normalized value, except that only a single
8909
* #x20 is appended for a "#xD#xA" sequence that is part of an external
8910
* parsed entity or the literal entity value of an internal parsed entity
8911
* - other characters are processed by appending them to the normalized value
8912
* If the declared value is not CDATA, then the XML processor must further
8913
* process the normalized attribute value by discarding any leading and
8914
* trailing space (#x20) characters, and by replacing sequences of space
8915
* (#x20) characters by a single space (#x20) character.
8916
* All attributes for which no declaration has been read should be treated
8917
* by a non-validating parser as if declared CDATA.
8919
* Returns the AttValue parsed or NULL. The value has to be freed by the
8920
* caller if it was copied, this can be detected by val[*len] == 0.
8924
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8928
const xmlChar *in = NULL, *start, *end, *last;
8929
xmlChar *ret = NULL;
8932
in = (xmlChar *) CUR_PTR;
8933
if (*in != '"' && *in != '\'') {
8934
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8937
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8940
* try to handle in this routine the most common case where no
8941
* allocation of a new string is required and where content is
8945
end = ctxt->input->end;
8948
const xmlChar *oldbase = ctxt->input->base;
8950
if (oldbase != ctxt->input->base) {
8951
long delta = ctxt->input->base - oldbase;
8952
start = start + delta;
8955
end = ctxt->input->end;
8959
* Skip any leading spaces
8961
while ((in < end) && (*in != limit) &&
8962
((*in == 0x20) || (*in == 0x9) ||
8963
(*in == 0xA) || (*in == 0xD))) {
8967
const xmlChar *oldbase = ctxt->input->base;
8969
if (ctxt->instate == XML_PARSER_EOF)
8971
if (oldbase != ctxt->input->base) {
8972
long delta = ctxt->input->base - oldbase;
8973
start = start + delta;
8976
end = ctxt->input->end;
8977
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8978
((ctxt->options & XML_PARSE_HUGE) == 0)) {
8979
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8980
"AttValue length too long\n");
8985
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8986
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8987
if ((*in++ == 0x20) && (*in == 0x20)) break;
8989
const xmlChar *oldbase = ctxt->input->base;
8991
if (ctxt->instate == XML_PARSER_EOF)
8993
if (oldbase != ctxt->input->base) {
8994
long delta = ctxt->input->base - oldbase;
8995
start = start + delta;
8998
end = ctxt->input->end;
8999
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9000
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9001
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9002
"AttValue length too long\n");
9009
* skip the trailing blanks
9011
while ((last[-1] == 0x20) && (last > start)) last--;
9012
while ((in < end) && (*in != limit) &&
9013
((*in == 0x20) || (*in == 0x9) ||
9014
(*in == 0xA) || (*in == 0xD))) {
9017
const xmlChar *oldbase = ctxt->input->base;
9019
if (ctxt->instate == XML_PARSER_EOF)
9021
if (oldbase != ctxt->input->base) {
9022
long delta = ctxt->input->base - oldbase;
9023
start = start + delta;
9025
last = last + delta;
9027
end = ctxt->input->end;
9028
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9029
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9030
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9031
"AttValue length too long\n");
9036
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9037
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9038
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9039
"AttValue length too long\n");
9042
if (*in != limit) goto need_complex;
9044
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9045
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9048
const xmlChar *oldbase = ctxt->input->base;
9050
if (ctxt->instate == XML_PARSER_EOF)
9052
if (oldbase != ctxt->input->base) {
9053
long delta = ctxt->input->base - oldbase;
9054
start = start + delta;
9057
end = ctxt->input->end;
9058
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9059
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9060
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9061
"AttValue length too long\n");
9067
if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9068
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9069
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9070
"AttValue length too long\n");
9073
if (*in != limit) goto need_complex;
9077
*len = last - start;
9078
ret = (xmlChar *) start;
9080
if (alloc) *alloc = 1;
9081
ret = xmlStrndup(start, last - start);
9084
if (alloc) *alloc = 0;
9087
if (alloc) *alloc = 1;
9088
return xmlParseAttValueComplex(ctxt, len, normalize);
9092
* xmlParseAttribute2:
9093
* @ctxt: an XML parser context
9094
* @pref: the element prefix
9095
* @elem: the element name
9096
* @prefix: a xmlChar ** used to store the value of the attribute prefix
9097
* @value: a xmlChar ** used to store the value of the attribute
9098
* @len: an int * to save the length of the attribute
9099
* @alloc: an int * to indicate if the attribute was allocated
9101
* parse an attribute in the new SAX2 framework.
9103
* Returns the attribute name, and the value in *value, .
9106
static const xmlChar *
9107
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9108
const xmlChar * pref, const xmlChar * elem,
9109
const xmlChar ** prefix, xmlChar ** value,
9110
int *len, int *alloc)
9112
const xmlChar *name;
9113
xmlChar *val, *internal_val = NULL;
9118
name = xmlParseQName(ctxt, prefix);
9120
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9121
"error parsing attribute name\n");
9126
* get the type if needed
9128
if (ctxt->attsSpecial != NULL) {
9131
type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9132
pref, elem, *prefix, name);
9144
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9147
* Sometimes a second normalisation pass for spaces is needed
9148
* but that only happens if charrefs or entities refernces
9149
* have been used in the attribute value, i.e. the attribute
9150
* value have been extracted in an allocated string already.
9153
const xmlChar *val2;
9155
val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9156
if ((val2 != NULL) && (val2 != val)) {
9158
val = (xmlChar *) val2;
9162
ctxt->instate = XML_PARSER_CONTENT;
9164
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9165
"Specification mandate value for attribute %s\n",
9170
if (*prefix == ctxt->str_xml) {
9172
* Check that xml:lang conforms to the specification
9173
* No more registered as an error, just generate a warning now
9174
* since this was deprecated in XML second edition
9176
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9177
internal_val = xmlStrndup(val, *len);
9178
if (!xmlCheckLanguageID(internal_val)) {
9179
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9180
"Malformed value for xml:lang : %s\n",
9181
internal_val, NULL);
9186
* Check that xml:space conforms to the specification
9188
if (xmlStrEqual(name, BAD_CAST "space")) {
9189
internal_val = xmlStrndup(val, *len);
9190
if (xmlStrEqual(internal_val, BAD_CAST "default"))
9192
else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9195
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9196
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9197
internal_val, NULL);
9201
xmlFree(internal_val);
9209
* xmlParseStartTag2:
9210
* @ctxt: an XML parser context
9212
* parse a start of tag either for rule element or
9213
* EmptyElement. In both case we don't parse the tag closing chars.
9214
* This routine is called when running SAX2 parsing
9216
* [40] STag ::= '<' Name (S Attribute)* S? '>'
9218
* [ WFC: Unique Att Spec ]
9219
* No attribute name may appear more than once in the same start-tag or
9220
* empty-element tag.
9222
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9224
* [ WFC: Unique Att Spec ]
9225
* No attribute name may appear more than once in the same start-tag or
9226
* empty-element tag.
9230
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9232
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9234
* Returns the element name parsed
9237
static const xmlChar *
9238
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9239
const xmlChar **URI, int *tlen) {
9240
const xmlChar *localname;
9241
const xmlChar *prefix;
9242
const xmlChar *attname;
9243
const xmlChar *aprefix;
9244
const xmlChar *nsname;
9246
const xmlChar **atts = ctxt->atts;
9247
int maxatts = ctxt->maxatts;
9248
int nratts, nbatts, nbdef;
9249
int i, j, nbNs, attval, oldline, oldcol;
9250
const xmlChar *base;
9252
int nsNr = ctxt->nsNr;
9254
if (RAW != '<') return(NULL);
9258
* NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9259
* point since the attribute values may be stored as pointers to
9260
* the buffer and calling SHRINK would destroy them !
9261
* The Shrinking is only possible once the full set of attribute
9262
* callbacks have been done.
9266
base = ctxt->input->base;
9267
cur = ctxt->input->cur - ctxt->input->base;
9268
oldline = ctxt->input->line;
9269
oldcol = ctxt->input->col;
9275
/* Forget any namespaces added during an earlier parse of this element. */
9278
localname = xmlParseQName(ctxt, &prefix);
9279
if (localname == NULL) {
9280
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9281
"StartTag: invalid element name\n");
9284
*tlen = ctxt->input->cur - ctxt->input->base - cur;
9287
* Now parse the attributes, it ends up with the ending
9293
if (ctxt->input->base != base) goto base_changed;
9295
while (((RAW != '>') &&
9296
((RAW != '/') || (NXT(1) != '>')) &&
9297
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9298
const xmlChar *q = CUR_PTR;
9299
unsigned int cons = ctxt->input->consumed;
9300
int len = -1, alloc = 0;
9302
attname = xmlParseAttribute2(ctxt, prefix, localname,
9303
&aprefix, &attvalue, &len, &alloc);
9304
if (ctxt->input->base != base) {
9305
if ((attvalue != NULL) && (alloc != 0))
9310
if ((attname != NULL) && (attvalue != NULL)) {
9311
if (len < 0) len = xmlStrlen(attvalue);
9312
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9313
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9317
uri = xmlParseURI((const char *) URL);
9319
xmlNsErr(ctxt, XML_WAR_NS_URI,
9320
"xmlns: '%s' is not a valid URI\n",
9323
if (uri->scheme == NULL) {
9324
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9325
"xmlns: URI %s is not absolute\n",
9330
if (URL == ctxt->str_xml_ns) {
9331
if (attname != ctxt->str_xml) {
9332
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9333
"xml namespace URI cannot be the default namespace\n",
9336
goto skip_default_ns;
9340
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9341
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9342
"reuse of the xmlns namespace name is forbidden\n",
9344
goto skip_default_ns;
9348
* check that it's not a defined namespace
9350
for (j = 1;j <= nbNs;j++)
9351
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9354
xmlErrAttributeDup(ctxt, NULL, attname);
9356
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9358
if (alloc != 0) xmlFree(attvalue);
9362
if (aprefix == ctxt->str_xmlns) {
9363
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9366
if (attname == ctxt->str_xml) {
9367
if (URL != ctxt->str_xml_ns) {
9368
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9369
"xml namespace prefix mapped to wrong URI\n",
9373
* Do not keep a namespace definition node
9377
if (URL == ctxt->str_xml_ns) {
9378
if (attname != ctxt->str_xml) {
9379
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9380
"xml namespace URI mapped to wrong prefix\n",
9385
if (attname == ctxt->str_xmlns) {
9386
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9387
"redefinition of the xmlns prefix is forbidden\n",
9393
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9394
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9395
"reuse of the xmlns namespace name is forbidden\n",
9399
if ((URL == NULL) || (URL[0] == 0)) {
9400
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9401
"xmlns:%s: Empty XML namespace is not allowed\n",
9402
attname, NULL, NULL);
9405
uri = xmlParseURI((const char *) URL);
9407
xmlNsErr(ctxt, XML_WAR_NS_URI,
9408
"xmlns:%s: '%s' is not a valid URI\n",
9409
attname, URL, NULL);
9411
if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9412
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9413
"xmlns:%s: URI %s is not absolute\n",
9414
attname, URL, NULL);
9421
* check that it's not a defined namespace
9423
for (j = 1;j <= nbNs;j++)
9424
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9427
xmlErrAttributeDup(ctxt, aprefix, attname);
9429
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9431
if (alloc != 0) xmlFree(attvalue);
9433
if (ctxt->input->base != base) goto base_changed;
9438
* Add the pair to atts
9440
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9441
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9442
if (attvalue[len] == 0)
9446
maxatts = ctxt->maxatts;
9449
ctxt->attallocs[nratts++] = alloc;
9450
atts[nbatts++] = attname;
9451
atts[nbatts++] = aprefix;
9452
atts[nbatts++] = NULL; /* the URI will be fetched later */
9453
atts[nbatts++] = attvalue;
9455
atts[nbatts++] = attvalue;
9457
* tag if some deallocation is needed
9459
if (alloc != 0) attval = 1;
9461
if ((attvalue != NULL) && (attvalue[len] == 0))
9468
if (ctxt->instate == XML_PARSER_EOF)
9470
if (ctxt->input->base != base) goto base_changed;
9471
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9473
if (!IS_BLANK_CH(RAW)) {
9474
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9475
"attributes construct error\n");
9479
if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9480
(attname == NULL) && (attvalue == NULL)) {
9481
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9482
"xmlParseStartTag: problem parsing attributes\n");
9486
if (ctxt->input->base != base) goto base_changed;
9490
* The attributes defaulting
9492
if (ctxt->attsDefault != NULL) {
9493
xmlDefAttrsPtr defaults;
9495
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9496
if (defaults != NULL) {
9497
for (i = 0;i < defaults->nbAttrs;i++) {
9498
attname = defaults->values[5 * i];
9499
aprefix = defaults->values[5 * i + 1];
9502
* special work for namespaces defaulted defs
9504
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9506
* check that it's not a defined namespace
9508
for (j = 1;j <= nbNs;j++)
9509
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9511
if (j <= nbNs) continue;
9513
nsname = xmlGetNamespace(ctxt, NULL);
9514
if (nsname != defaults->values[5 * i + 2]) {
9515
if (nsPush(ctxt, NULL,
9516
defaults->values[5 * i + 2]) > 0)
9519
} else if (aprefix == ctxt->str_xmlns) {
9521
* check that it's not a defined namespace
9523
for (j = 1;j <= nbNs;j++)
9524
if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9526
if (j <= nbNs) continue;
9528
nsname = xmlGetNamespace(ctxt, attname);
9529
if (nsname != defaults->values[2]) {
9530
if (nsPush(ctxt, attname,
9531
defaults->values[5 * i + 2]) > 0)
9536
* check that it's not a defined attribute
9538
for (j = 0;j < nbatts;j+=5) {
9539
if ((attname == atts[j]) && (aprefix == atts[j+1]))
9542
if (j < nbatts) continue;
9544
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9545
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9548
maxatts = ctxt->maxatts;
9551
atts[nbatts++] = attname;
9552
atts[nbatts++] = aprefix;
9553
if (aprefix == NULL)
9554
atts[nbatts++] = NULL;
9556
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9557
atts[nbatts++] = defaults->values[5 * i + 2];
9558
atts[nbatts++] = defaults->values[5 * i + 3];
9559
if ((ctxt->standalone == 1) &&
9560
(defaults->values[5 * i + 4] != NULL)) {
9561
xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9562
"standalone: attribute %s on %s defaulted from external subset\n",
9563
attname, localname);
9572
* The attributes checkings
9574
for (i = 0; i < nbatts;i += 5) {
9576
* The default namespace does not apply to attribute names.
9578
if (atts[i + 1] != NULL) {
9579
nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9580
if (nsname == NULL) {
9581
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9582
"Namespace prefix %s for %s on %s is not defined\n",
9583
atts[i + 1], atts[i], localname);
9585
atts[i + 2] = nsname;
9589
* [ WFC: Unique Att Spec ]
9590
* No attribute name may appear more than once in the same
9591
* start-tag or empty-element tag.
9592
* As extended by the Namespace in XML REC.
9594
for (j = 0; j < i;j += 5) {
9595
if (atts[i] == atts[j]) {
9596
if (atts[i+1] == atts[j+1]) {
9597
xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9600
if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9601
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9602
"Namespaced Attribute %s in '%s' redefined\n",
9603
atts[i], nsname, NULL);
9610
nsname = xmlGetNamespace(ctxt, prefix);
9611
if ((prefix != NULL) && (nsname == NULL)) {
9612
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9613
"Namespace prefix %s on %s is not defined\n",
9614
prefix, localname, NULL);
9620
* SAX: Start of Element !
9622
if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9623
(!ctxt->disableSAX)) {
9625
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9626
nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9627
nbatts / 5, nbdef, atts);
9629
ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9630
nsname, 0, NULL, nbatts / 5, nbdef, atts);
9634
* Free up attribute allocated strings if needed
9637
for (i = 3,j = 0; j < nratts;i += 5,j++)
9638
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9639
xmlFree((xmlChar *) atts[i]);
9646
* the attribute strings are valid iif the base didn't changed
9649
for (i = 3,j = 0; j < nratts;i += 5,j++)
9650
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9651
xmlFree((xmlChar *) atts[i]);
9653
ctxt->input->cur = ctxt->input->base + cur;
9654
ctxt->input->line = oldline;
9655
ctxt->input->col = oldcol;
9656
if (ctxt->wellFormed == 1) {
9664
* @ctxt: an XML parser context
9665
* @line: line of the start tag
9666
* @nsNr: number of namespaces on the start tag
9668
* parse an end of tag
9670
* [42] ETag ::= '</' Name S? '>'
9674
* [NS 9] ETag ::= '</' QName S? '>'
9678
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9679
const xmlChar *URI, int line, int nsNr, int tlen) {
9680
const xmlChar *name;
9683
if ((RAW != '<') || (NXT(1) != '/')) {
9684
xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9689
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9690
if (ctxt->input->cur[tlen] == '>') {
9691
ctxt->input->cur += tlen + 1;
9694
ctxt->input->cur += tlen;
9698
name = xmlParseNameAndCompare(ctxt, ctxt->name);
9700
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9704
* We should definitely be at the ending "S? '>'" part
9707
if (ctxt->instate == XML_PARSER_EOF)
9710
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9711
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9716
* [ WFC: Element Type Match ]
9717
* The Name in an element's end-tag must match the element type in the
9721
if (name != (xmlChar*)1) {
9722
if (name == NULL) name = BAD_CAST "unparseable";
9723
if ((line == 0) && (ctxt->node != NULL))
9724
line = ctxt->node->line;
9725
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9726
"Opening and ending tag mismatch: %s line %d and %s\n",
9727
ctxt->name, line, name);
9734
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9735
(!ctxt->disableSAX))
9736
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9746
* @ctxt: an XML parser context
9748
* Parse escaped pure raw content.
9750
* [18] CDSect ::= CDStart CData CDEnd
9752
* [19] CDStart ::= '<![CDATA['
9754
* [20] Data ::= (Char* - (Char* ']]>' Char*))
9756
* [21] CDEnd ::= ']]>'
9759
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9760
xmlChar *buf = NULL;
9762
int size = XML_PARSER_BUFFER_SIZE;
9768
/* Check 2.6.0 was NXT(0) not RAW */
9769
if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9774
ctxt->instate = XML_PARSER_CDATA_SECTION;
9777
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9778
ctxt->instate = XML_PARSER_CONTENT;
9784
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9785
ctxt->instate = XML_PARSER_CONTENT;
9790
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9792
xmlErrMemory(ctxt, NULL);
9795
while (IS_CHAR(cur) &&
9796
((r != ']') || (s != ']') || (cur != '>'))) {
9797
if (len + 5 >= size) {
9800
if ((size > XML_MAX_TEXT_LENGTH) &&
9801
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9802
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9803
"CData section too big found", NULL);
9807
tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9810
xmlErrMemory(ctxt, NULL);
9816
COPY_BUF(rl,buf,len,r);
9824
if (ctxt->instate == XML_PARSER_EOF) {
9834
ctxt->instate = XML_PARSER_CONTENT;
9836
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9837
"CData section not finished\n%.50s\n", buf);
9844
* OK the buffer is to be consumed as cdata.
9846
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9847
if (ctxt->sax->cdataBlock != NULL)
9848
ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9849
else if (ctxt->sax->characters != NULL)
9850
ctxt->sax->characters(ctxt->userData, buf, len);
9857
* @ctxt: an XML parser context
9861
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9865
xmlParseContent(xmlParserCtxtPtr ctxt) {
9867
while ((RAW != 0) &&
9868
((RAW != '<') || (NXT(1) != '/')) &&
9869
(ctxt->instate != XML_PARSER_EOF)) {
9870
const xmlChar *test = CUR_PTR;
9871
unsigned int cons = ctxt->input->consumed;
9872
const xmlChar *cur = ctxt->input->cur;
9875
* First case : a Processing Instruction.
9877
if ((*cur == '<') && (cur[1] == '?')) {
9882
* Second case : a CDSection
9884
/* 2.6.0 test was *cur not RAW */
9885
else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9886
xmlParseCDSect(ctxt);
9890
* Third case : a comment
9892
else if ((*cur == '<') && (NXT(1) == '!') &&
9893
(NXT(2) == '-') && (NXT(3) == '-')) {
9894
xmlParseComment(ctxt);
9895
ctxt->instate = XML_PARSER_CONTENT;
9899
* Fourth case : a sub-element.
9901
else if (*cur == '<') {
9902
xmlParseElement(ctxt);
9906
* Fifth case : a reference. If if has not been resolved,
9907
* parsing returns it's Name, create the node
9910
else if (*cur == '&') {
9911
xmlParseReference(ctxt);
9915
* Last case, text. Note that References are handled directly.
9918
xmlParseCharData(ctxt, 0);
9923
* Pop-up of finished entities.
9925
while ((RAW == 0) && (ctxt->inputNr > 1))
9929
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9930
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9931
"detected an error in element content\n");
9932
ctxt->instate = XML_PARSER_EOF;
9940
* @ctxt: an XML parser context
9942
* parse an XML element, this is highly recursive
9944
* [39] element ::= EmptyElemTag | STag content ETag
9946
* [ WFC: Element Type Match ]
9947
* The Name in an element's end-tag must match the element type in the
9953
xmlParseElement(xmlParserCtxtPtr ctxt) {
9954
const xmlChar *name;
9955
const xmlChar *prefix = NULL;
9956
const xmlChar *URI = NULL;
9957
xmlParserNodeInfo node_info;
9960
int nsNr = ctxt->nsNr;
9962
if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9963
((ctxt->options & XML_PARSE_HUGE) == 0)) {
9964
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9965
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9967
ctxt->instate = XML_PARSER_EOF;
9971
/* Capture start position */
9972
if (ctxt->record_info) {
9973
node_info.begin_pos = ctxt->input->consumed +
9974
(CUR_PTR - ctxt->input->base);
9975
node_info.begin_line = ctxt->input->line;
9978
if (ctxt->spaceNr == 0)
9979
spacePush(ctxt, -1);
9980
else if (*ctxt->space == -2)
9981
spacePush(ctxt, -1);
9983
spacePush(ctxt, *ctxt->space);
9985
line = ctxt->input->line;
9986
#ifdef LIBXML_SAX1_ENABLED
9988
#endif /* LIBXML_SAX1_ENABLED */
9989
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9990
#ifdef LIBXML_SAX1_ENABLED
9992
name = xmlParseStartTag(ctxt);
9993
#endif /* LIBXML_SAX1_ENABLED */
9994
if (ctxt->instate == XML_PARSER_EOF)
10000
namePush(ctxt, name);
10003
#ifdef LIBXML_VALID_ENABLED
10005
* [ VC: Root Element Type ]
10006
* The Name in the document type declaration must match the element
10007
* type of the root element.
10009
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10010
ctxt->node && (ctxt->node == ctxt->myDoc->children))
10011
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10012
#endif /* LIBXML_VALID_ENABLED */
10015
* Check for an Empty Element.
10017
if ((RAW == '/') && (NXT(1) == '>')) {
10020
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10021
(!ctxt->disableSAX))
10022
ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10023
#ifdef LIBXML_SAX1_ENABLED
10025
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10026
(!ctxt->disableSAX))
10027
ctxt->sax->endElement(ctxt->userData, name);
10028
#endif /* LIBXML_SAX1_ENABLED */
10032
if (nsNr != ctxt->nsNr)
10033
nsPop(ctxt, ctxt->nsNr - nsNr);
10034
if ( ret != NULL && ctxt->record_info ) {
10035
node_info.end_pos = ctxt->input->consumed +
10036
(CUR_PTR - ctxt->input->base);
10037
node_info.end_line = ctxt->input->line;
10038
node_info.node = ret;
10039
xmlParserAddNodeInfo(ctxt, &node_info);
10046
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10047
"Couldn't find end of Start Tag %s line %d\n",
10051
* end of parsing of this node.
10056
if (nsNr != ctxt->nsNr)
10057
nsPop(ctxt, ctxt->nsNr - nsNr);
10060
* Capture end position and add node
10062
if ( ret != NULL && ctxt->record_info ) {
10063
node_info.end_pos = ctxt->input->consumed +
10064
(CUR_PTR - ctxt->input->base);
10065
node_info.end_line = ctxt->input->line;
10066
node_info.node = ret;
10067
xmlParserAddNodeInfo(ctxt, &node_info);
10073
* Parse the content of the element:
10075
xmlParseContent(ctxt);
10076
if (ctxt->instate == XML_PARSER_EOF)
10078
if (!IS_BYTE_CHAR(RAW)) {
10079
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10080
"Premature end of data in tag %s line %d\n",
10084
* end of parsing of this node.
10089
if (nsNr != ctxt->nsNr)
10090
nsPop(ctxt, ctxt->nsNr - nsNr);
10095
* parse the end of tag: '</' should be here.
10098
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10101
#ifdef LIBXML_SAX1_ENABLED
10103
xmlParseEndTag1(ctxt, line);
10104
#endif /* LIBXML_SAX1_ENABLED */
10107
* Capture end position and add node
10109
if ( ret != NULL && ctxt->record_info ) {
10110
node_info.end_pos = ctxt->input->consumed +
10111
(CUR_PTR - ctxt->input->base);
10112
node_info.end_line = ctxt->input->line;
10113
node_info.node = ret;
10114
xmlParserAddNodeInfo(ctxt, &node_info);
10119
* xmlParseVersionNum:
10120
* @ctxt: an XML parser context
10122
* parse the XML version value.
10124
* [26] VersionNum ::= '1.' [0-9]+
10126
* In practice allow [0-9].[0-9]+ at that level
10128
* Returns the string giving the XML version number, or NULL
10131
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10132
xmlChar *buf = NULL;
10137
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10139
xmlErrMemory(ctxt, NULL);
10143
if (!((cur >= '0') && (cur <= '9'))) {
10157
while ((cur >= '0') && (cur <= '9')) {
10158
if (len + 1 >= size) {
10162
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10165
xmlErrMemory(ctxt, NULL);
10179
* xmlParseVersionInfo:
10180
* @ctxt: an XML parser context
10182
* parse the XML version.
10184
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10186
* [25] Eq ::= S? '=' S?
10188
* Returns the version string, e.g. "1.0"
10192
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10193
xmlChar *version = NULL;
10195
if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10199
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10206
version = xmlParseVersionNum(ctxt);
10208
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10211
} else if (RAW == '\''){
10213
version = xmlParseVersionNum(ctxt);
10215
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10219
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10227
* @ctxt: an XML parser context
10229
* parse the XML encoding name
10231
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10233
* Returns the encoding name value or NULL
10236
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10237
xmlChar *buf = NULL;
10243
if (((cur >= 'a') && (cur <= 'z')) ||
10244
((cur >= 'A') && (cur <= 'Z'))) {
10245
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10247
xmlErrMemory(ctxt, NULL);
10254
while (((cur >= 'a') && (cur <= 'z')) ||
10255
((cur >= 'A') && (cur <= 'Z')) ||
10256
((cur >= '0') && (cur <= '9')) ||
10257
(cur == '.') || (cur == '_') ||
10259
if (len + 1 >= size) {
10263
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10265
xmlErrMemory(ctxt, NULL);
10282
xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10288
* xmlParseEncodingDecl:
10289
* @ctxt: an XML parser context
10291
* parse the XML encoding declaration
10293
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10295
* this setups the conversion filters.
10297
* Returns the encoding value or NULL
10301
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10302
xmlChar *encoding = NULL;
10305
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10309
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10316
encoding = xmlParseEncName(ctxt);
10318
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10321
} else if (RAW == '\''){
10323
encoding = xmlParseEncName(ctxt);
10325
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10329
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10333
* Non standard parsing, allowing the user to ignore encoding
10335
if (ctxt->options & XML_PARSE_IGNORE_ENC)
10339
* UTF-16 encoding stwich has already taken place at this stage,
10340
* more over the little-endian/big-endian selection is already done
10342
if ((encoding != NULL) &&
10343
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10344
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10346
* If no encoding was passed to the parser, that we are
10347
* using UTF-16 and no decoder is present i.e. the
10348
* document is apparently UTF-8 compatible, then raise an
10349
* encoding mismatch fatal error
10351
if ((ctxt->encoding == NULL) &&
10352
(ctxt->input->buf != NULL) &&
10353
(ctxt->input->buf->encoder == NULL)) {
10354
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10355
"Document labelled UTF-16 but has UTF-8 content\n");
10357
if (ctxt->encoding != NULL)
10358
xmlFree((xmlChar *) ctxt->encoding);
10359
ctxt->encoding = encoding;
10362
* UTF-8 encoding is handled natively
10364
else if ((encoding != NULL) &&
10365
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10366
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10367
if (ctxt->encoding != NULL)
10368
xmlFree((xmlChar *) ctxt->encoding);
10369
ctxt->encoding = encoding;
10371
else if (encoding != NULL) {
10372
xmlCharEncodingHandlerPtr handler;
10374
if (ctxt->input->encoding != NULL)
10375
xmlFree((xmlChar *) ctxt->input->encoding);
10376
ctxt->input->encoding = encoding;
10378
handler = xmlFindCharEncodingHandler((const char *) encoding);
10379
if (handler != NULL) {
10380
xmlSwitchToEncoding(ctxt, handler);
10382
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10383
"Unsupported encoding %s\n", encoding);
10393
* @ctxt: an XML parser context
10395
* parse the XML standalone declaration
10397
* [32] SDDecl ::= S 'standalone' Eq
10398
* (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10400
* [ VC: Standalone Document Declaration ]
10401
* TODO The standalone document declaration must have the value "no"
10402
* if any external markup declarations contain declarations of:
10403
* - attributes with default values, if elements to which these
10404
* attributes apply appear in the document without specifications
10405
* of values for these attributes, or
10406
* - entities (other than amp, lt, gt, apos, quot), if references
10407
* to those entities appear in the document, or
10408
* - attributes with values subject to normalization, where the
10409
* attribute appears in the document with a value which will change
10410
* as a result of normalization, or
10411
* - element types with element content, if white space occurs directly
10412
* within any instance of those types.
10415
* 1 if standalone="yes"
10416
* 0 if standalone="no"
10417
* -2 if standalone attribute is missing or invalid
10418
* (A standalone value of -2 means that the XML declaration was found,
10419
* but no value was specified for the standalone attribute).
10423
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10424
int standalone = -2;
10427
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10431
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10432
return(standalone);
10438
if ((RAW == 'n') && (NXT(1) == 'o')) {
10441
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10446
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10449
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10452
} else if (RAW == '"'){
10454
if ((RAW == 'n') && (NXT(1) == 'o')) {
10457
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10462
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10465
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10469
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10472
return(standalone);
10477
* @ctxt: an XML parser context
10479
* parse an XML declaration header
10481
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10485
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10489
* This value for standalone indicates that the document has an
10490
* XML declaration but it does not have a standalone attribute.
10491
* It will be overwritten later if a standalone attribute is found.
10493
ctxt->input->standalone = -2;
10496
* We know that '<?xml' is here.
10500
if (!IS_BLANK_CH(RAW)) {
10501
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10502
"Blank needed after '<?xml'\n");
10507
* We must have the VersionInfo here.
10509
version = xmlParseVersionInfo(ctxt);
10510
if (version == NULL) {
10511
xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10513
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10515
* Changed here for XML-1.0 5th edition
10517
if (ctxt->options & XML_PARSE_OLD10) {
10518
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10519
"Unsupported version '%s'\n",
10522
if ((version[0] == '1') && ((version[1] == '.'))) {
10523
xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10524
"Unsupported version '%s'\n",
10527
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10528
"Unsupported version '%s'\n",
10533
if (ctxt->version != NULL)
10534
xmlFree((void *) ctxt->version);
10535
ctxt->version = version;
10539
* We may have the encoding declaration
10541
if (!IS_BLANK_CH(RAW)) {
10542
if ((RAW == '?') && (NXT(1) == '>')) {
10546
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10548
xmlParseEncodingDecl(ctxt);
10549
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10551
* The XML REC instructs us to stop parsing right here
10557
* We may have the standalone status.
10559
if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10560
if ((RAW == '?') && (NXT(1) == '>')) {
10564
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10568
* We can grow the input buffer freely at that point
10573
ctxt->input->standalone = xmlParseSDDecl(ctxt);
10576
if ((RAW == '?') && (NXT(1) == '>')) {
10578
} else if (RAW == '>') {
10579
/* Deprecated old WD ... */
10580
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10583
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10584
MOVETO_ENDTAG(CUR_PTR);
10591
* @ctxt: an XML parser context
10593
* parse an XML Misc* optional field.
10595
* [27] Misc ::= Comment | PI | S
10599
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10600
while ((ctxt->instate != XML_PARSER_EOF) &&
10601
(((RAW == '<') && (NXT(1) == '?')) ||
10602
(CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10603
IS_BLANK_CH(CUR))) {
10604
if ((RAW == '<') && (NXT(1) == '?')) {
10606
} else if (IS_BLANK_CH(CUR)) {
10609
xmlParseComment(ctxt);
10614
* xmlParseDocument:
10615
* @ctxt: an XML parser context
10617
* parse an XML document (and build a tree if using the standard SAX
10620
* [1] document ::= prolog element Misc*
10622
* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10624
* Returns 0, -1 in case of error. the parser context is augmented
10625
* as a result of the parsing.
10629
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10631
xmlCharEncoding enc;
10635
if ((ctxt == NULL) || (ctxt->input == NULL))
10641
* SAX: detecting the level.
10643
xmlDetectSAX2(ctxt);
10646
* SAX: beginning of the document processing.
10648
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10649
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10650
if (ctxt->instate == XML_PARSER_EOF)
10653
if ((ctxt->encoding == NULL) &&
10654
((ctxt->input->end - ctxt->input->cur) >= 4)) {
10656
* Get the 4 first bytes and decode the charset
10657
* if enc != XML_CHAR_ENCODING_NONE
10658
* plug some encoding conversion routines.
10664
enc = xmlDetectCharEncoding(&start[0], 4);
10665
if (enc != XML_CHAR_ENCODING_NONE) {
10666
xmlSwitchEncoding(ctxt, enc);
10672
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10676
* Check for the XMLDecl in the Prolog.
10677
* do not GROW here to avoid the detected encoder to decode more
10678
* than just the first line, unless the amount of data is really
10679
* too small to hold "<?xml version="1.0" encoding="foo"
10681
if ((ctxt->input->end - ctxt->input->cur) < 35) {
10684
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10687
* Note that we will switch encoding on the fly.
10689
xmlParseXMLDecl(ctxt);
10690
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10692
* The XML REC instructs us to stop parsing right here
10696
ctxt->standalone = ctxt->input->standalone;
10699
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10701
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10702
ctxt->sax->startDocument(ctxt->userData);
10703
if (ctxt->instate == XML_PARSER_EOF)
10707
* The Misc part of the Prolog
10710
xmlParseMisc(ctxt);
10713
* Then possibly doc type declaration(s) and more Misc
10714
* (doctypedecl Misc*)?
10717
if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10719
ctxt->inSubset = 1;
10720
xmlParseDocTypeDecl(ctxt);
10722
ctxt->instate = XML_PARSER_DTD;
10723
xmlParseInternalSubset(ctxt);
10724
if (ctxt->instate == XML_PARSER_EOF)
10729
* Create and update the external subset.
10731
ctxt->inSubset = 2;
10732
if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10733
(!ctxt->disableSAX))
10734
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10735
ctxt->extSubSystem, ctxt->extSubURI);
10736
if (ctxt->instate == XML_PARSER_EOF)
10738
ctxt->inSubset = 0;
10740
xmlCleanSpecialAttr(ctxt);
10742
ctxt->instate = XML_PARSER_PROLOG;
10743
xmlParseMisc(ctxt);
10747
* Time to start parsing the tree itself
10751
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10752
"Start tag expected, '<' not found\n");
10754
ctxt->instate = XML_PARSER_CONTENT;
10755
xmlParseElement(ctxt);
10756
ctxt->instate = XML_PARSER_EPILOG;
10760
* The Misc part at the end
10762
xmlParseMisc(ctxt);
10765
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10767
ctxt->instate = XML_PARSER_EOF;
10771
* SAX: end of the document processing.
10773
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10774
ctxt->sax->endDocument(ctxt->userData);
10777
* Remove locally kept entity definitions if the tree was not built
10779
if ((ctxt->myDoc != NULL) &&
10780
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10781
xmlFreeDoc(ctxt->myDoc);
10782
ctxt->myDoc = NULL;
10785
if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10786
ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10788
ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10789
if (ctxt->nsWellFormed)
10790
ctxt->myDoc->properties |= XML_DOC_NSVALID;
10791
if (ctxt->options & XML_PARSE_OLD10)
10792
ctxt->myDoc->properties |= XML_DOC_OLD10;
10794
if (! ctxt->wellFormed) {
10802
* xmlParseExtParsedEnt:
10803
* @ctxt: an XML parser context
10805
* parse a general parsed entity
10806
* An external general parsed entity is well-formed if it matches the
10807
* production labeled extParsedEnt.
10809
* [78] extParsedEnt ::= TextDecl? content
10811
* Returns 0, -1 in case of error. the parser context is augmented
10812
* as a result of the parsing.
10816
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10818
xmlCharEncoding enc;
10820
if ((ctxt == NULL) || (ctxt->input == NULL))
10823
xmlDefaultSAXHandlerInit();
10825
xmlDetectSAX2(ctxt);
10830
* SAX: beginning of the document processing.
10832
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10833
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10836
* Get the 4 first bytes and decode the charset
10837
* if enc != XML_CHAR_ENCODING_NONE
10838
* plug some encoding conversion routines.
10840
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10845
enc = xmlDetectCharEncoding(start, 4);
10846
if (enc != XML_CHAR_ENCODING_NONE) {
10847
xmlSwitchEncoding(ctxt, enc);
10853
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10857
* Check for the XMLDecl in the Prolog.
10860
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10863
* Note that we will switch encoding on the fly.
10865
xmlParseXMLDecl(ctxt);
10866
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10868
* The XML REC instructs us to stop parsing right here
10874
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10876
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10877
ctxt->sax->startDocument(ctxt->userData);
10878
if (ctxt->instate == XML_PARSER_EOF)
10882
* Doing validity checking on chunk doesn't make sense
10884
ctxt->instate = XML_PARSER_CONTENT;
10885
ctxt->validate = 0;
10886
ctxt->loadsubset = 0;
10889
xmlParseContent(ctxt);
10890
if (ctxt->instate == XML_PARSER_EOF)
10893
if ((RAW == '<') && (NXT(1) == '/')) {
10894
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10895
} else if (RAW != 0) {
10896
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10900
* SAX: end of the document processing.
10902
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10903
ctxt->sax->endDocument(ctxt->userData);
10905
if (! ctxt->wellFormed) return(-1);
10909
#ifdef LIBXML_PUSH_ENABLED
10910
/************************************************************************
10912
* Progressive parsing interfaces *
10914
************************************************************************/
10917
* xmlParseLookupSequence:
10918
* @ctxt: an XML parser context
10919
* @first: the first char to lookup
10920
* @next: the next char to lookup or zero
10921
* @third: the next char to lookup or zero
10923
* Try to find if a sequence (first, next, third) or just (first next) or
10924
* (first) is available in the input stream.
10925
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
10926
* to avoid rescanning sequences of bytes, it DOES change the state of the
10927
* parser, do not use liberally.
10929
* Returns the index to the current parsing point if the full sequence
10930
* is available, -1 otherwise.
10933
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10934
xmlChar next, xmlChar third) {
10936
xmlParserInputPtr in;
10937
const xmlChar *buf;
10940
if (in == NULL) return(-1);
10941
base = in->cur - in->base;
10942
if (base < 0) return(-1);
10943
if (ctxt->checkIndex > base)
10944
base = ctxt->checkIndex;
10945
if (in->buf == NULL) {
10949
buf = xmlBufContent(in->buf->buffer);
10950
len = xmlBufUse(in->buf->buffer);
10952
/* take into account the sequence length */
10953
if (third) len -= 2;
10954
else if (next) len --;
10955
for (;base < len;base++) {
10956
if (buf[base] == first) {
10958
if ((buf[base + 1] != next) ||
10959
(buf[base + 2] != third)) continue;
10960
} else if (next != 0) {
10961
if (buf[base + 1] != next) continue;
10963
ctxt->checkIndex = 0;
10966
xmlGenericError(xmlGenericErrorContext,
10967
"PP: lookup '%c' found at %d\n",
10969
else if (third == 0)
10970
xmlGenericError(xmlGenericErrorContext,
10971
"PP: lookup '%c%c' found at %d\n",
10972
first, next, base);
10974
xmlGenericError(xmlGenericErrorContext,
10975
"PP: lookup '%c%c%c' found at %d\n",
10976
first, next, third, base);
10978
return(base - (in->cur - in->base));
10981
ctxt->checkIndex = base;
10984
xmlGenericError(xmlGenericErrorContext,
10985
"PP: lookup '%c' failed\n", first);
10986
else if (third == 0)
10987
xmlGenericError(xmlGenericErrorContext,
10988
"PP: lookup '%c%c' failed\n", first, next);
10990
xmlGenericError(xmlGenericErrorContext,
10991
"PP: lookup '%c%c%c' failed\n", first, next, third);
10997
* xmlParseGetLasts:
10998
* @ctxt: an XML parser context
10999
* @lastlt: pointer to store the last '<' from the input
11000
* @lastgt: pointer to store the last '>' from the input
11002
* Lookup the last < and > in the current chunk
11005
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11006
const xmlChar **lastgt) {
11007
const xmlChar *tmp;
11009
if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11010
xmlGenericError(xmlGenericErrorContext,
11011
"Internal error: xmlParseGetLasts\n");
11014
if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11015
tmp = ctxt->input->end;
11017
while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11018
if (tmp < ctxt->input->base) {
11024
while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11025
if (*tmp == '\'') {
11027
while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11028
if (tmp < ctxt->input->end) tmp++;
11029
} else if (*tmp == '"') {
11031
while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11032
if (tmp < ctxt->input->end) tmp++;
11036
if (tmp < ctxt->input->end)
11041
while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11042
if (tmp >= ctxt->input->base)
11054
* xmlCheckCdataPush:
11055
* @cur: pointer to the bock of characters
11056
* @len: length of the block in bytes
11058
* Check that the block of characters is okay as SCdata content [20]
11060
* Returns the number of bytes to pass if okay, a negative index where an
11061
* UTF-8 error occured otherwise
11064
xmlCheckCdataPush(const xmlChar *utf, int len) {
11069
if ((utf == NULL) || (len <= 0))
11072
for (ix = 0; ix < len;) { /* string is 0-terminated */
11074
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11077
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11081
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11082
if (ix + 2 > len) return(ix);
11083
if ((utf[ix+1] & 0xc0 ) != 0x80)
11085
codepoint = (utf[ix] & 0x1f) << 6;
11086
codepoint |= utf[ix+1] & 0x3f;
11087
if (!xmlIsCharQ(codepoint))
11090
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11091
if (ix + 3 > len) return(ix);
11092
if (((utf[ix+1] & 0xc0) != 0x80) ||
11093
((utf[ix+2] & 0xc0) != 0x80))
11095
codepoint = (utf[ix] & 0xf) << 12;
11096
codepoint |= (utf[ix+1] & 0x3f) << 6;
11097
codepoint |= utf[ix+2] & 0x3f;
11098
if (!xmlIsCharQ(codepoint))
11101
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11102
if (ix + 4 > len) return(ix);
11103
if (((utf[ix+1] & 0xc0) != 0x80) ||
11104
((utf[ix+2] & 0xc0) != 0x80) ||
11105
((utf[ix+3] & 0xc0) != 0x80))
11107
codepoint = (utf[ix] & 0x7) << 18;
11108
codepoint |= (utf[ix+1] & 0x3f) << 12;
11109
codepoint |= (utf[ix+2] & 0x3f) << 6;
11110
codepoint |= utf[ix+3] & 0x3f;
11111
if (!xmlIsCharQ(codepoint))
11114
} else /* unknown encoding */
11121
* xmlParseTryOrFinish:
11122
* @ctxt: an XML parser context
11123
* @terminate: last chunk indicator
11125
* Try to progress on parsing
11127
* Returns zero if no parsing was possible
11130
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11134
const xmlChar *lastlt, *lastgt;
11136
if (ctxt->input == NULL)
11140
switch (ctxt->instate) {
11141
case XML_PARSER_EOF:
11142
xmlGenericError(xmlGenericErrorContext,
11143
"PP: try EOF\n"); break;
11144
case XML_PARSER_START:
11145
xmlGenericError(xmlGenericErrorContext,
11146
"PP: try START\n"); break;
11147
case XML_PARSER_MISC:
11148
xmlGenericError(xmlGenericErrorContext,
11149
"PP: try MISC\n");break;
11150
case XML_PARSER_COMMENT:
11151
xmlGenericError(xmlGenericErrorContext,
11152
"PP: try COMMENT\n");break;
11153
case XML_PARSER_PROLOG:
11154
xmlGenericError(xmlGenericErrorContext,
11155
"PP: try PROLOG\n");break;
11156
case XML_PARSER_START_TAG:
11157
xmlGenericError(xmlGenericErrorContext,
11158
"PP: try START_TAG\n");break;
11159
case XML_PARSER_CONTENT:
11160
xmlGenericError(xmlGenericErrorContext,
11161
"PP: try CONTENT\n");break;
11162
case XML_PARSER_CDATA_SECTION:
11163
xmlGenericError(xmlGenericErrorContext,
11164
"PP: try CDATA_SECTION\n");break;
11165
case XML_PARSER_END_TAG:
11166
xmlGenericError(xmlGenericErrorContext,
11167
"PP: try END_TAG\n");break;
11168
case XML_PARSER_ENTITY_DECL:
11169
xmlGenericError(xmlGenericErrorContext,
11170
"PP: try ENTITY_DECL\n");break;
11171
case XML_PARSER_ENTITY_VALUE:
11172
xmlGenericError(xmlGenericErrorContext,
11173
"PP: try ENTITY_VALUE\n");break;
11174
case XML_PARSER_ATTRIBUTE_VALUE:
11175
xmlGenericError(xmlGenericErrorContext,
11176
"PP: try ATTRIBUTE_VALUE\n");break;
11177
case XML_PARSER_DTD:
11178
xmlGenericError(xmlGenericErrorContext,
11179
"PP: try DTD\n");break;
11180
case XML_PARSER_EPILOG:
11181
xmlGenericError(xmlGenericErrorContext,
11182
"PP: try EPILOG\n");break;
11183
case XML_PARSER_PI:
11184
xmlGenericError(xmlGenericErrorContext,
11185
"PP: try PI\n");break;
11186
case XML_PARSER_IGNORE:
11187
xmlGenericError(xmlGenericErrorContext,
11188
"PP: try IGNORE\n");break;
11192
if ((ctxt->input != NULL) &&
11193
(ctxt->input->cur - ctxt->input->base > 4096)) {
11195
ctxt->checkIndex = 0;
11197
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11199
while (ctxt->instate != XML_PARSER_EOF) {
11200
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11205
* Pop-up of finished entities.
11207
while ((RAW == 0) && (ctxt->inputNr > 1))
11210
if (ctxt->input == NULL) break;
11211
if (ctxt->input->buf == NULL)
11212
avail = ctxt->input->length -
11213
(ctxt->input->cur - ctxt->input->base);
11216
* If we are operating on converted input, try to flush
11217
* remainng chars to avoid them stalling in the non-converted
11218
* buffer. But do not do this in document start where
11219
* encoding="..." may not have been read and we work on a
11220
* guessed encoding.
11222
if ((ctxt->instate != XML_PARSER_START) &&
11223
(ctxt->input->buf->raw != NULL) &&
11224
(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11225
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11227
size_t current = ctxt->input->cur - ctxt->input->base;
11229
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11230
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11233
avail = xmlBufUse(ctxt->input->buf->buffer) -
11234
(ctxt->input->cur - ctxt->input->base);
11238
switch (ctxt->instate) {
11239
case XML_PARSER_EOF:
11241
* Document parsing is done !
11244
case XML_PARSER_START:
11245
if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11247
xmlCharEncoding enc;
11250
* Very first chars read from the document flow.
11256
* Get the 4 first bytes and decode the charset
11257
* if enc != XML_CHAR_ENCODING_NONE
11258
* plug some encoding conversion routines,
11259
* else xmlSwitchEncoding will set to (default)
11266
enc = xmlDetectCharEncoding(start, 4);
11267
xmlSwitchEncoding(ctxt, enc);
11273
cur = ctxt->input->cur[0];
11274
next = ctxt->input->cur[1];
11276
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11277
ctxt->sax->setDocumentLocator(ctxt->userData,
11278
&xmlDefaultSAXLocator);
11279
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11280
ctxt->instate = XML_PARSER_EOF;
11282
xmlGenericError(xmlGenericErrorContext,
11283
"PP: entering EOF\n");
11285
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11286
ctxt->sax->endDocument(ctxt->userData);
11289
if ((cur == '<') && (next == '?')) {
11290
/* PI or XML decl */
11291
if (avail < 5) return(ret);
11292
if ((!terminate) &&
11293
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11295
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11296
ctxt->sax->setDocumentLocator(ctxt->userData,
11297
&xmlDefaultSAXLocator);
11298
if ((ctxt->input->cur[2] == 'x') &&
11299
(ctxt->input->cur[3] == 'm') &&
11300
(ctxt->input->cur[4] == 'l') &&
11301
(IS_BLANK_CH(ctxt->input->cur[5]))) {
11304
xmlGenericError(xmlGenericErrorContext,
11305
"PP: Parsing XML Decl\n");
11307
xmlParseXMLDecl(ctxt);
11308
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11310
* The XML REC instructs us to stop parsing right
11313
ctxt->instate = XML_PARSER_EOF;
11316
ctxt->standalone = ctxt->input->standalone;
11317
if ((ctxt->encoding == NULL) &&
11318
(ctxt->input->encoding != NULL))
11319
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11320
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11321
(!ctxt->disableSAX))
11322
ctxt->sax->startDocument(ctxt->userData);
11323
ctxt->instate = XML_PARSER_MISC;
11325
xmlGenericError(xmlGenericErrorContext,
11326
"PP: entering MISC\n");
11329
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11330
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11331
(!ctxt->disableSAX))
11332
ctxt->sax->startDocument(ctxt->userData);
11333
ctxt->instate = XML_PARSER_MISC;
11335
xmlGenericError(xmlGenericErrorContext,
11336
"PP: entering MISC\n");
11340
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11341
ctxt->sax->setDocumentLocator(ctxt->userData,
11342
&xmlDefaultSAXLocator);
11343
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11344
if (ctxt->version == NULL) {
11345
xmlErrMemory(ctxt, NULL);
11348
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11349
(!ctxt->disableSAX))
11350
ctxt->sax->startDocument(ctxt->userData);
11351
ctxt->instate = XML_PARSER_MISC;
11353
xmlGenericError(xmlGenericErrorContext,
11354
"PP: entering MISC\n");
11358
case XML_PARSER_START_TAG: {
11359
const xmlChar *name;
11360
const xmlChar *prefix = NULL;
11361
const xmlChar *URI = NULL;
11362
int nsNr = ctxt->nsNr;
11364
if ((avail < 2) && (ctxt->inputNr == 1))
11366
cur = ctxt->input->cur[0];
11368
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11369
ctxt->instate = XML_PARSER_EOF;
11370
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11371
ctxt->sax->endDocument(ctxt->userData);
11375
if (ctxt->progressive) {
11376
/* > can be found unescaped in attribute values */
11377
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11379
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11383
if (ctxt->spaceNr == 0)
11384
spacePush(ctxt, -1);
11385
else if (*ctxt->space == -2)
11386
spacePush(ctxt, -1);
11388
spacePush(ctxt, *ctxt->space);
11389
#ifdef LIBXML_SAX1_ENABLED
11391
#endif /* LIBXML_SAX1_ENABLED */
11392
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11393
#ifdef LIBXML_SAX1_ENABLED
11395
name = xmlParseStartTag(ctxt);
11396
#endif /* LIBXML_SAX1_ENABLED */
11397
if (ctxt->instate == XML_PARSER_EOF)
11399
if (name == NULL) {
11401
ctxt->instate = XML_PARSER_EOF;
11402
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11403
ctxt->sax->endDocument(ctxt->userData);
11406
#ifdef LIBXML_VALID_ENABLED
11408
* [ VC: Root Element Type ]
11409
* The Name in the document type declaration must match
11410
* the element type of the root element.
11412
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11413
ctxt->node && (ctxt->node == ctxt->myDoc->children))
11414
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11415
#endif /* LIBXML_VALID_ENABLED */
11418
* Check for an Empty Element.
11420
if ((RAW == '/') && (NXT(1) == '>')) {
11424
if ((ctxt->sax != NULL) &&
11425
(ctxt->sax->endElementNs != NULL) &&
11426
(!ctxt->disableSAX))
11427
ctxt->sax->endElementNs(ctxt->userData, name,
11429
if (ctxt->nsNr - nsNr > 0)
11430
nsPop(ctxt, ctxt->nsNr - nsNr);
11431
#ifdef LIBXML_SAX1_ENABLED
11433
if ((ctxt->sax != NULL) &&
11434
(ctxt->sax->endElement != NULL) &&
11435
(!ctxt->disableSAX))
11436
ctxt->sax->endElement(ctxt->userData, name);
11437
#endif /* LIBXML_SAX1_ENABLED */
11439
if (ctxt->instate == XML_PARSER_EOF)
11442
if (ctxt->nameNr == 0) {
11443
ctxt->instate = XML_PARSER_EPILOG;
11445
ctxt->instate = XML_PARSER_CONTENT;
11447
ctxt->progressive = 1;
11453
xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11454
"Couldn't find end of Start Tag %s\n",
11460
nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11461
#ifdef LIBXML_SAX1_ENABLED
11463
namePush(ctxt, name);
11464
#endif /* LIBXML_SAX1_ENABLED */
11466
ctxt->instate = XML_PARSER_CONTENT;
11467
ctxt->progressive = 1;
11470
case XML_PARSER_CONTENT: {
11471
const xmlChar *test;
11473
if ((avail < 2) && (ctxt->inputNr == 1))
11475
cur = ctxt->input->cur[0];
11476
next = ctxt->input->cur[1];
11479
cons = ctxt->input->consumed;
11480
if ((cur == '<') && (next == '/')) {
11481
ctxt->instate = XML_PARSER_END_TAG;
11483
} else if ((cur == '<') && (next == '?')) {
11484
if ((!terminate) &&
11485
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11486
ctxt->progressive = XML_PARSER_PI;
11490
ctxt->instate = XML_PARSER_CONTENT;
11491
ctxt->progressive = 1;
11492
} else if ((cur == '<') && (next != '!')) {
11493
ctxt->instate = XML_PARSER_START_TAG;
11495
} else if ((cur == '<') && (next == '!') &&
11496
(ctxt->input->cur[2] == '-') &&
11497
(ctxt->input->cur[3] == '-')) {
11502
ctxt->input->cur += 4;
11503
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11504
ctxt->input->cur -= 4;
11505
if ((!terminate) && (term < 0)) {
11506
ctxt->progressive = XML_PARSER_COMMENT;
11509
xmlParseComment(ctxt);
11510
ctxt->instate = XML_PARSER_CONTENT;
11511
ctxt->progressive = 1;
11512
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11513
(ctxt->input->cur[2] == '[') &&
11514
(ctxt->input->cur[3] == 'C') &&
11515
(ctxt->input->cur[4] == 'D') &&
11516
(ctxt->input->cur[5] == 'A') &&
11517
(ctxt->input->cur[6] == 'T') &&
11518
(ctxt->input->cur[7] == 'A') &&
11519
(ctxt->input->cur[8] == '[')) {
11521
ctxt->instate = XML_PARSER_CDATA_SECTION;
11523
} else if ((cur == '<') && (next == '!') &&
11526
} else if (cur == '&') {
11527
if ((!terminate) &&
11528
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11530
xmlParseReference(ctxt);
11532
/* TODO Avoid the extra copy, handle directly !!! */
11534
* Goal of the following test is:
11535
* - minimize calls to the SAX 'character' callback
11536
* when they are mergeable
11537
* - handle an problem for isBlank when we only parse
11538
* a sequence of blank chars and the next one is
11539
* not available to check against '<' presence.
11540
* - tries to homogenize the differences in SAX
11541
* callbacks between the push and pull versions
11544
if ((ctxt->inputNr == 1) &&
11545
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11547
if (ctxt->progressive) {
11548
if ((lastlt == NULL) ||
11549
(ctxt->input->cur > lastlt))
11551
} else if (xmlParseLookupSequence(ctxt,
11557
ctxt->checkIndex = 0;
11558
xmlParseCharData(ctxt, 0);
11561
* Pop-up of finished entities.
11563
while ((RAW == 0) && (ctxt->inputNr > 1))
11565
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11566
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11567
"detected an error in element content\n");
11568
ctxt->instate = XML_PARSER_EOF;
11573
case XML_PARSER_END_TAG:
11577
if (ctxt->progressive) {
11578
/* > can be found unescaped in attribute values */
11579
if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11581
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11586
xmlParseEndTag2(ctxt,
11587
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11588
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11589
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11592
#ifdef LIBXML_SAX1_ENABLED
11594
xmlParseEndTag1(ctxt, 0);
11595
#endif /* LIBXML_SAX1_ENABLED */
11596
if (ctxt->instate == XML_PARSER_EOF) {
11598
} else if (ctxt->nameNr == 0) {
11599
ctxt->instate = XML_PARSER_EPILOG;
11601
ctxt->instate = XML_PARSER_CONTENT;
11604
case XML_PARSER_CDATA_SECTION: {
11606
* The Push mode need to have the SAX callback for
11607
* cdataBlock merge back contiguous callbacks.
11611
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11613
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11616
tmp = xmlCheckCdataPush(ctxt->input->cur,
11617
XML_PARSER_BIG_BUFFER_SIZE);
11620
ctxt->input->cur += tmp;
11621
goto encoding_error;
11623
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11624
if (ctxt->sax->cdataBlock != NULL)
11625
ctxt->sax->cdataBlock(ctxt->userData,
11626
ctxt->input->cur, tmp);
11627
else if (ctxt->sax->characters != NULL)
11628
ctxt->sax->characters(ctxt->userData,
11629
ctxt->input->cur, tmp);
11631
if (ctxt->instate == XML_PARSER_EOF)
11634
ctxt->checkIndex = 0;
11640
tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11641
if ((tmp < 0) || (tmp != base)) {
11643
ctxt->input->cur += tmp;
11644
goto encoding_error;
11646
if ((ctxt->sax != NULL) && (base == 0) &&
11647
(ctxt->sax->cdataBlock != NULL) &&
11648
(!ctxt->disableSAX)) {
11650
* Special case to provide identical behaviour
11651
* between pull and push parsers on enpty CDATA
11654
if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11655
(!strncmp((const char *)&ctxt->input->cur[-9],
11657
ctxt->sax->cdataBlock(ctxt->userData,
11659
} else if ((ctxt->sax != NULL) && (base > 0) &&
11660
(!ctxt->disableSAX)) {
11661
if (ctxt->sax->cdataBlock != NULL)
11662
ctxt->sax->cdataBlock(ctxt->userData,
11663
ctxt->input->cur, base);
11664
else if (ctxt->sax->characters != NULL)
11665
ctxt->sax->characters(ctxt->userData,
11666
ctxt->input->cur, base);
11668
if (ctxt->instate == XML_PARSER_EOF)
11671
ctxt->checkIndex = 0;
11672
ctxt->instate = XML_PARSER_CONTENT;
11674
xmlGenericError(xmlGenericErrorContext,
11675
"PP: entering CONTENT\n");
11680
case XML_PARSER_MISC:
11682
if (ctxt->input->buf == NULL)
11683
avail = ctxt->input->length -
11684
(ctxt->input->cur - ctxt->input->base);
11686
avail = xmlBufUse(ctxt->input->buf->buffer) -
11687
(ctxt->input->cur - ctxt->input->base);
11690
cur = ctxt->input->cur[0];
11691
next = ctxt->input->cur[1];
11692
if ((cur == '<') && (next == '?')) {
11693
if ((!terminate) &&
11694
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11695
ctxt->progressive = XML_PARSER_PI;
11699
xmlGenericError(xmlGenericErrorContext,
11700
"PP: Parsing PI\n");
11703
if (ctxt->instate == XML_PARSER_EOF)
11705
ctxt->instate = XML_PARSER_MISC;
11706
ctxt->progressive = 1;
11707
ctxt->checkIndex = 0;
11708
} else if ((cur == '<') && (next == '!') &&
11709
(ctxt->input->cur[2] == '-') &&
11710
(ctxt->input->cur[3] == '-')) {
11711
if ((!terminate) &&
11712
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11713
ctxt->progressive = XML_PARSER_COMMENT;
11717
xmlGenericError(xmlGenericErrorContext,
11718
"PP: Parsing Comment\n");
11720
xmlParseComment(ctxt);
11721
if (ctxt->instate == XML_PARSER_EOF)
11723
ctxt->instate = XML_PARSER_MISC;
11724
ctxt->progressive = 1;
11725
ctxt->checkIndex = 0;
11726
} else if ((cur == '<') && (next == '!') &&
11727
(ctxt->input->cur[2] == 'D') &&
11728
(ctxt->input->cur[3] == 'O') &&
11729
(ctxt->input->cur[4] == 'C') &&
11730
(ctxt->input->cur[5] == 'T') &&
11731
(ctxt->input->cur[6] == 'Y') &&
11732
(ctxt->input->cur[7] == 'P') &&
11733
(ctxt->input->cur[8] == 'E')) {
11734
if ((!terminate) &&
11735
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11736
ctxt->progressive = XML_PARSER_DTD;
11740
xmlGenericError(xmlGenericErrorContext,
11741
"PP: Parsing internal subset\n");
11743
ctxt->inSubset = 1;
11744
ctxt->progressive = 0;
11745
ctxt->checkIndex = 0;
11746
xmlParseDocTypeDecl(ctxt);
11747
if (ctxt->instate == XML_PARSER_EOF)
11750
ctxt->instate = XML_PARSER_DTD;
11752
xmlGenericError(xmlGenericErrorContext,
11753
"PP: entering DTD\n");
11757
* Create and update the external subset.
11759
ctxt->inSubset = 2;
11760
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11761
(ctxt->sax->externalSubset != NULL))
11762
ctxt->sax->externalSubset(ctxt->userData,
11763
ctxt->intSubName, ctxt->extSubSystem,
11765
ctxt->inSubset = 0;
11766
xmlCleanSpecialAttr(ctxt);
11767
ctxt->instate = XML_PARSER_PROLOG;
11769
xmlGenericError(xmlGenericErrorContext,
11770
"PP: entering PROLOG\n");
11773
} else if ((cur == '<') && (next == '!') &&
11777
ctxt->instate = XML_PARSER_START_TAG;
11778
ctxt->progressive = XML_PARSER_START_TAG;
11779
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11781
xmlGenericError(xmlGenericErrorContext,
11782
"PP: entering START_TAG\n");
11786
case XML_PARSER_PROLOG:
11788
if (ctxt->input->buf == NULL)
11789
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11791
avail = xmlBufUse(ctxt->input->buf->buffer) -
11792
(ctxt->input->cur - ctxt->input->base);
11795
cur = ctxt->input->cur[0];
11796
next = ctxt->input->cur[1];
11797
if ((cur == '<') && (next == '?')) {
11798
if ((!terminate) &&
11799
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11800
ctxt->progressive = XML_PARSER_PI;
11804
xmlGenericError(xmlGenericErrorContext,
11805
"PP: Parsing PI\n");
11808
if (ctxt->instate == XML_PARSER_EOF)
11810
ctxt->instate = XML_PARSER_PROLOG;
11811
ctxt->progressive = 1;
11812
} else if ((cur == '<') && (next == '!') &&
11813
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11814
if ((!terminate) &&
11815
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11816
ctxt->progressive = XML_PARSER_COMMENT;
11820
xmlGenericError(xmlGenericErrorContext,
11821
"PP: Parsing Comment\n");
11823
xmlParseComment(ctxt);
11824
if (ctxt->instate == XML_PARSER_EOF)
11826
ctxt->instate = XML_PARSER_PROLOG;
11827
ctxt->progressive = 1;
11828
} else if ((cur == '<') && (next == '!') &&
11832
ctxt->instate = XML_PARSER_START_TAG;
11833
if (ctxt->progressive == 0)
11834
ctxt->progressive = XML_PARSER_START_TAG;
11835
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11837
xmlGenericError(xmlGenericErrorContext,
11838
"PP: entering START_TAG\n");
11842
case XML_PARSER_EPILOG:
11844
if (ctxt->input->buf == NULL)
11845
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11847
avail = xmlBufUse(ctxt->input->buf->buffer) -
11848
(ctxt->input->cur - ctxt->input->base);
11851
cur = ctxt->input->cur[0];
11852
next = ctxt->input->cur[1];
11853
if ((cur == '<') && (next == '?')) {
11854
if ((!terminate) &&
11855
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11856
ctxt->progressive = XML_PARSER_PI;
11860
xmlGenericError(xmlGenericErrorContext,
11861
"PP: Parsing PI\n");
11864
if (ctxt->instate == XML_PARSER_EOF)
11866
ctxt->instate = XML_PARSER_EPILOG;
11867
ctxt->progressive = 1;
11868
} else if ((cur == '<') && (next == '!') &&
11869
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11870
if ((!terminate) &&
11871
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11872
ctxt->progressive = XML_PARSER_COMMENT;
11876
xmlGenericError(xmlGenericErrorContext,
11877
"PP: Parsing Comment\n");
11879
xmlParseComment(ctxt);
11880
if (ctxt->instate == XML_PARSER_EOF)
11882
ctxt->instate = XML_PARSER_EPILOG;
11883
ctxt->progressive = 1;
11884
} else if ((cur == '<') && (next == '!') &&
11888
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11889
ctxt->instate = XML_PARSER_EOF;
11891
xmlGenericError(xmlGenericErrorContext,
11892
"PP: entering EOF\n");
11894
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11895
ctxt->sax->endDocument(ctxt->userData);
11899
case XML_PARSER_DTD: {
11901
* Sorry but progressive parsing of the internal subset
11902
* is not expected to be supported. We first check that
11903
* the full content of the internal subset is available and
11904
* the parsing is launched only at that point.
11905
* Internal subset ends up with "']' S? '>'" in an unescaped
11906
* section and not in a ']]>' sequence which are conditional
11907
* sections (whoever argued to keep that crap in XML deserve
11908
* a place in hell !).
11915
base = ctxt->input->cur - ctxt->input->base;
11916
if (base < 0) return(0);
11917
if (ctxt->checkIndex > base)
11918
base = ctxt->checkIndex;
11919
buf = xmlBufContent(ctxt->input->buf->buffer);
11920
use = xmlBufUse(ctxt->input->buf->buffer);
11921
for (;(unsigned int) base < use; base++) {
11923
if (buf[base] == quote)
11927
if ((quote == 0) && (buf[base] == '<')) {
11929
/* special handling of comments */
11930
if (((unsigned int) base + 4 < use) &&
11931
(buf[base + 1] == '!') &&
11932
(buf[base + 2] == '-') &&
11933
(buf[base + 3] == '-')) {
11934
for (;(unsigned int) base + 3 < use; base++) {
11935
if ((buf[base] == '-') &&
11936
(buf[base + 1] == '-') &&
11937
(buf[base + 2] == '>')) {
11945
fprintf(stderr, "unfinished comment\n");
11952
if (buf[base] == '"') {
11956
if (buf[base] == '\'') {
11960
if (buf[base] == ']') {
11962
fprintf(stderr, "%c%c%c%c: ", buf[base],
11963
buf[base + 1], buf[base + 2], buf[base + 3]);
11965
if ((unsigned int) base +1 >= use)
11967
if (buf[base + 1] == ']') {
11968
/* conditional crap, skip both ']' ! */
11972
for (i = 1; (unsigned int) base + i < use; i++) {
11973
if (buf[base + i] == '>') {
11975
fprintf(stderr, "found\n");
11977
goto found_end_int_subset;
11979
if (!IS_BLANK_CH(buf[base + i])) {
11981
fprintf(stderr, "not found\n");
11983
goto not_end_of_int_subset;
11987
fprintf(stderr, "end of stream\n");
11992
not_end_of_int_subset:
11993
continue; /* for */
11996
* We didn't found the end of the Internal subset
11999
ctxt->checkIndex = base;
12001
ctxt->checkIndex = 0;
12004
xmlGenericError(xmlGenericErrorContext,
12005
"PP: lookup of int subset end filed\n");
12009
found_end_int_subset:
12010
ctxt->checkIndex = 0;
12011
xmlParseInternalSubset(ctxt);
12012
if (ctxt->instate == XML_PARSER_EOF)
12014
ctxt->inSubset = 2;
12015
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12016
(ctxt->sax->externalSubset != NULL))
12017
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12018
ctxt->extSubSystem, ctxt->extSubURI);
12019
ctxt->inSubset = 0;
12020
xmlCleanSpecialAttr(ctxt);
12021
if (ctxt->instate == XML_PARSER_EOF)
12023
ctxt->instate = XML_PARSER_PROLOG;
12024
ctxt->checkIndex = 0;
12026
xmlGenericError(xmlGenericErrorContext,
12027
"PP: entering PROLOG\n");
12031
case XML_PARSER_COMMENT:
12032
xmlGenericError(xmlGenericErrorContext,
12033
"PP: internal error, state == COMMENT\n");
12034
ctxt->instate = XML_PARSER_CONTENT;
12036
xmlGenericError(xmlGenericErrorContext,
12037
"PP: entering CONTENT\n");
12040
case XML_PARSER_IGNORE:
12041
xmlGenericError(xmlGenericErrorContext,
12042
"PP: internal error, state == IGNORE");
12043
ctxt->instate = XML_PARSER_DTD;
12045
xmlGenericError(xmlGenericErrorContext,
12046
"PP: entering DTD\n");
12049
case XML_PARSER_PI:
12050
xmlGenericError(xmlGenericErrorContext,
12051
"PP: internal error, state == PI\n");
12052
ctxt->instate = XML_PARSER_CONTENT;
12054
xmlGenericError(xmlGenericErrorContext,
12055
"PP: entering CONTENT\n");
12058
case XML_PARSER_ENTITY_DECL:
12059
xmlGenericError(xmlGenericErrorContext,
12060
"PP: internal error, state == ENTITY_DECL\n");
12061
ctxt->instate = XML_PARSER_DTD;
12063
xmlGenericError(xmlGenericErrorContext,
12064
"PP: entering DTD\n");
12067
case XML_PARSER_ENTITY_VALUE:
12068
xmlGenericError(xmlGenericErrorContext,
12069
"PP: internal error, state == ENTITY_VALUE\n");
12070
ctxt->instate = XML_PARSER_CONTENT;
12072
xmlGenericError(xmlGenericErrorContext,
12073
"PP: entering DTD\n");
12076
case XML_PARSER_ATTRIBUTE_VALUE:
12077
xmlGenericError(xmlGenericErrorContext,
12078
"PP: internal error, state == ATTRIBUTE_VALUE\n");
12079
ctxt->instate = XML_PARSER_START_TAG;
12081
xmlGenericError(xmlGenericErrorContext,
12082
"PP: entering START_TAG\n");
12085
case XML_PARSER_SYSTEM_LITERAL:
12086
xmlGenericError(xmlGenericErrorContext,
12087
"PP: internal error, state == SYSTEM_LITERAL\n");
12088
ctxt->instate = XML_PARSER_START_TAG;
12090
xmlGenericError(xmlGenericErrorContext,
12091
"PP: entering START_TAG\n");
12094
case XML_PARSER_PUBLIC_LITERAL:
12095
xmlGenericError(xmlGenericErrorContext,
12096
"PP: internal error, state == PUBLIC_LITERAL\n");
12097
ctxt->instate = XML_PARSER_START_TAG;
12099
xmlGenericError(xmlGenericErrorContext,
12100
"PP: entering START_TAG\n");
12107
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12114
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12115
ctxt->input->cur[0], ctxt->input->cur[1],
12116
ctxt->input->cur[2], ctxt->input->cur[3]);
12117
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12118
"Input is not proper UTF-8, indicate encoding !\n%s",
12119
BAD_CAST buffer, NULL);
12125
* xmlParseCheckTransition:
12126
* @ctxt: an XML parser context
12127
* @chunk: a char array
12128
* @size: the size in byte of the chunk
12130
* Check depending on the current parser state if the chunk given must be
12131
* processed immediately or one need more data to advance on parsing.
12133
* Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12136
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12137
if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12139
if (ctxt->instate == XML_PARSER_START_TAG) {
12140
if (memchr(chunk, '>', size) != NULL)
12144
if (ctxt->progressive == XML_PARSER_COMMENT) {
12145
if (memchr(chunk, '>', size) != NULL)
12149
if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12150
if (memchr(chunk, '>', size) != NULL)
12154
if (ctxt->progressive == XML_PARSER_PI) {
12155
if (memchr(chunk, '>', size) != NULL)
12159
if (ctxt->instate == XML_PARSER_END_TAG) {
12160
if (memchr(chunk, '>', size) != NULL)
12164
if ((ctxt->progressive == XML_PARSER_DTD) ||
12165
(ctxt->instate == XML_PARSER_DTD)) {
12166
if (memchr(chunk, '>', size) != NULL)
12175
* @ctxt: an XML parser context
12176
* @chunk: an char array
12177
* @size: the size in byte of the chunk
12178
* @terminate: last chunk indicator
12180
* Parse a Chunk of memory
12182
* Returns zero if no error, the xmlParserErrors otherwise.
12185
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12189
size_t old_avail = 0;
12193
return(XML_ERR_INTERNAL_ERROR);
12194
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12195
return(ctxt->errNo);
12196
if (ctxt->instate == XML_PARSER_EOF)
12198
if (ctxt->instate == XML_PARSER_START)
12199
xmlDetectSAX2(ctxt);
12200
if ((size > 0) && (chunk != NULL) && (!terminate) &&
12201
(chunk[size - 1] == '\r')) {
12208
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12209
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12210
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12211
size_t cur = ctxt->input->cur - ctxt->input->base;
12214
old_avail = xmlBufUse(ctxt->input->buf->buffer);
12216
* Specific handling if we autodetected an encoding, we should not
12217
* push more than the first line ... which depend on the encoding
12218
* And only push the rest once the final encoding was detected
12220
if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12221
(ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12222
unsigned int len = 45;
12224
if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12225
BAD_CAST "UTF-16")) ||
12226
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12227
BAD_CAST "UTF16")))
12229
else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12230
BAD_CAST "UCS-4")) ||
12231
(xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12235
if (ctxt->input->buf->rawconsumed < len)
12236
len -= ctxt->input->buf->rawconsumed;
12239
* Change size for reading the initial declaration only
12240
* if size is greater than len. Otherwise, memmove in xmlBufferAdd
12241
* will blindly copy extra bytes from memory.
12243
if ((unsigned int) size > len) {
12244
remain = size - len;
12250
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12252
ctxt->errNo = XML_PARSER_EOF;
12253
ctxt->disableSAX = 1;
12254
return (XML_PARSER_EOF);
12256
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12258
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12261
} else if (ctxt->instate != XML_PARSER_EOF) {
12262
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12263
xmlParserInputBufferPtr in = ctxt->input->buf;
12264
if ((in->encoder != NULL) && (in->buffer != NULL) &&
12265
(in->raw != NULL)) {
12267
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12268
size_t current = ctxt->input->cur - ctxt->input->base;
12270
nbchars = xmlCharEncInput(in, terminate);
12273
xmlGenericError(xmlGenericErrorContext,
12274
"xmlParseChunk: encoder error\n");
12275
return(XML_ERR_INVALID_ENCODING);
12277
xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12282
xmlParseTryOrFinish(ctxt, 0);
12284
if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12285
avail = xmlBufUse(ctxt->input->buf->buffer);
12287
* Depending on the current state it may not be such
12288
* a good idea to try parsing if there is nothing in the chunk
12289
* which would be worth doing a parser state transition and we
12290
* need to wait for more data
12292
if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12293
(old_avail == 0) || (avail == 0) ||
12294
(xmlParseCheckTransition(ctxt,
12295
(const char *)&ctxt->input->base[old_avail],
12296
avail - old_avail)))
12297
xmlParseTryOrFinish(ctxt, terminate);
12299
if (ctxt->instate == XML_PARSER_EOF)
12300
return(ctxt->errNo);
12302
if ((ctxt->input != NULL) &&
12303
(((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12304
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12305
((ctxt->options & XML_PARSE_HUGE) == 0)) {
12306
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12307
ctxt->instate = XML_PARSER_EOF;
12309
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12310
return(ctxt->errNo);
12318
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12319
(ctxt->input->buf != NULL)) {
12320
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12322
size_t current = ctxt->input->cur - ctxt->input->base;
12324
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12326
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12331
* Check for termination
12335
if (ctxt->input != NULL) {
12336
if (ctxt->input->buf == NULL)
12337
cur_avail = ctxt->input->length -
12338
(ctxt->input->cur - ctxt->input->base);
12340
cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12341
(ctxt->input->cur - ctxt->input->base);
12344
if ((ctxt->instate != XML_PARSER_EOF) &&
12345
(ctxt->instate != XML_PARSER_EPILOG)) {
12346
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12348
if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12349
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12351
if (ctxt->instate != XML_PARSER_EOF) {
12352
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12353
ctxt->sax->endDocument(ctxt->userData);
12355
ctxt->instate = XML_PARSER_EOF;
12357
if (ctxt->wellFormed == 0)
12358
return((xmlParserErrors) ctxt->errNo);
12363
/************************************************************************
12365
* I/O front end functions to the parser *
12367
************************************************************************/
12370
* xmlCreatePushParserCtxt:
12371
* @sax: a SAX handler
12372
* @user_data: The user data returned on SAX callbacks
12373
* @chunk: a pointer to an array of chars
12374
* @size: number of chars in the array
12375
* @filename: an optional file name or URI
12377
* Create a parser context for using the XML parser in push mode.
12378
* If @buffer and @size are non-NULL, the data is used to detect
12379
* the encoding. The remaining characters will be parsed so they
12380
* don't need to be fed in again through xmlParseChunk.
12381
* To allow content encoding detection, @size should be >= 4
12382
* The value of @filename is used for fetching external entities
12383
* and error/warning reports.
12385
* Returns the new parser context or NULL
12389
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12390
const char *chunk, int size, const char *filename) {
12391
xmlParserCtxtPtr ctxt;
12392
xmlParserInputPtr inputStream;
12393
xmlParserInputBufferPtr buf;
12394
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12397
* plug some encoding conversion routines
12399
if ((chunk != NULL) && (size >= 4))
12400
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12402
buf = xmlAllocParserInputBuffer(enc);
12403
if (buf == NULL) return(NULL);
12405
ctxt = xmlNewParserCtxt();
12406
if (ctxt == NULL) {
12407
xmlErrMemory(NULL, "creating parser: out of memory\n");
12408
xmlFreeParserInputBuffer(buf);
12411
ctxt->dictNames = 1;
12412
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12413
if (ctxt->pushTab == NULL) {
12414
xmlErrMemory(ctxt, NULL);
12415
xmlFreeParserInputBuffer(buf);
12416
xmlFreeParserCtxt(ctxt);
12420
#ifdef LIBXML_SAX1_ENABLED
12421
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12422
#endif /* LIBXML_SAX1_ENABLED */
12423
xmlFree(ctxt->sax);
12424
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12425
if (ctxt->sax == NULL) {
12426
xmlErrMemory(ctxt, NULL);
12427
xmlFreeParserInputBuffer(buf);
12428
xmlFreeParserCtxt(ctxt);
12431
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12432
if (sax->initialized == XML_SAX2_MAGIC)
12433
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12435
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12436
if (user_data != NULL)
12437
ctxt->userData = user_data;
12439
if (filename == NULL) {
12440
ctxt->directory = NULL;
12442
ctxt->directory = xmlParserGetDirectory(filename);
12445
inputStream = xmlNewInputStream(ctxt);
12446
if (inputStream == NULL) {
12447
xmlFreeParserCtxt(ctxt);
12448
xmlFreeParserInputBuffer(buf);
12452
if (filename == NULL)
12453
inputStream->filename = NULL;
12455
inputStream->filename = (char *)
12456
xmlCanonicPath((const xmlChar *) filename);
12457
if (inputStream->filename == NULL) {
12458
xmlFreeParserCtxt(ctxt);
12459
xmlFreeParserInputBuffer(buf);
12463
inputStream->buf = buf;
12464
xmlBufResetInput(inputStream->buf->buffer, inputStream);
12465
inputPush(ctxt, inputStream);
12468
* If the caller didn't provide an initial 'chunk' for determining
12469
* the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12470
* that it can be automatically determined later
12472
if ((size == 0) || (chunk == NULL)) {
12473
ctxt->charset = XML_CHAR_ENCODING_NONE;
12474
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12475
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12476
size_t cur = ctxt->input->cur - ctxt->input->base;
12478
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12480
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12482
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12486
if (enc != XML_CHAR_ENCODING_NONE) {
12487
xmlSwitchEncoding(ctxt, enc);
12492
#endif /* LIBXML_PUSH_ENABLED */
12496
* @ctxt: an XML parser context
12498
* Blocks further parser processing
12501
xmlStopParser(xmlParserCtxtPtr ctxt) {
12504
ctxt->instate = XML_PARSER_EOF;
12505
ctxt->errNo = XML_ERR_USER_STOP;
12506
ctxt->disableSAX = 1;
12507
if (ctxt->input != NULL) {
12508
ctxt->input->cur = BAD_CAST"";
12509
ctxt->input->base = ctxt->input->cur;
12514
* xmlCreateIOParserCtxt:
12515
* @sax: a SAX handler
12516
* @user_data: The user data returned on SAX callbacks
12517
* @ioread: an I/O read function
12518
* @ioclose: an I/O close function
12519
* @ioctx: an I/O handler
12520
* @enc: the charset encoding if known
12522
* Create a parser context for using the XML parser with an existing
12525
* Returns the new parser context or NULL
12528
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12529
xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12530
void *ioctx, xmlCharEncoding enc) {
12531
xmlParserCtxtPtr ctxt;
12532
xmlParserInputPtr inputStream;
12533
xmlParserInputBufferPtr buf;
12535
if (ioread == NULL) return(NULL);
12537
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12539
if (ioclose != NULL)
12544
ctxt = xmlNewParserCtxt();
12545
if (ctxt == NULL) {
12546
xmlFreeParserInputBuffer(buf);
12550
#ifdef LIBXML_SAX1_ENABLED
12551
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12552
#endif /* LIBXML_SAX1_ENABLED */
12553
xmlFree(ctxt->sax);
12554
ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12555
if (ctxt->sax == NULL) {
12556
xmlErrMemory(ctxt, NULL);
12557
xmlFreeParserCtxt(ctxt);
12560
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12561
if (sax->initialized == XML_SAX2_MAGIC)
12562
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12564
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12565
if (user_data != NULL)
12566
ctxt->userData = user_data;
12569
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12570
if (inputStream == NULL) {
12571
xmlFreeParserCtxt(ctxt);
12574
inputPush(ctxt, inputStream);
12579
#ifdef LIBXML_VALID_ENABLED
12580
/************************************************************************
12582
* Front ends when parsing a DTD *
12584
************************************************************************/
12588
* @sax: the SAX handler block or NULL
12589
* @input: an Input Buffer
12590
* @enc: the charset encoding if known
12592
* Load and parse a DTD
12594
* Returns the resulting xmlDtdPtr or NULL in case of error.
12595
* @input will be freed by the function in any case.
12599
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12600
xmlCharEncoding enc) {
12601
xmlDtdPtr ret = NULL;
12602
xmlParserCtxtPtr ctxt;
12603
xmlParserInputPtr pinput = NULL;
12609
ctxt = xmlNewParserCtxt();
12610
if (ctxt == NULL) {
12611
xmlFreeParserInputBuffer(input);
12615
/* We are loading a DTD */
12616
ctxt->options |= XML_PARSE_DTDLOAD;
12619
* Set-up the SAX context
12622
if (ctxt->sax != NULL)
12623
xmlFree(ctxt->sax);
12625
ctxt->userData = ctxt;
12627
xmlDetectSAX2(ctxt);
12630
* generate a parser input from the I/O handler
12633
pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12634
if (pinput == NULL) {
12635
if (sax != NULL) ctxt->sax = NULL;
12636
xmlFreeParserInputBuffer(input);
12637
xmlFreeParserCtxt(ctxt);
12642
* plug some encoding conversion routines here.
12644
if (xmlPushInput(ctxt, pinput) < 0) {
12645
if (sax != NULL) ctxt->sax = NULL;
12646
xmlFreeParserCtxt(ctxt);
12649
if (enc != XML_CHAR_ENCODING_NONE) {
12650
xmlSwitchEncoding(ctxt, enc);
12653
pinput->filename = NULL;
12656
pinput->base = ctxt->input->cur;
12657
pinput->cur = ctxt->input->cur;
12658
pinput->free = NULL;
12661
* let's parse that entity knowing it's an external subset.
12663
ctxt->inSubset = 2;
12664
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12665
if (ctxt->myDoc == NULL) {
12666
xmlErrMemory(ctxt, "New Doc failed");
12669
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12670
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12671
BAD_CAST "none", BAD_CAST "none");
12673
if ((enc == XML_CHAR_ENCODING_NONE) &&
12674
((ctxt->input->end - ctxt->input->cur) >= 4)) {
12676
* Get the 4 first bytes and decode the charset
12677
* if enc != XML_CHAR_ENCODING_NONE
12678
* plug some encoding conversion routines.
12684
enc = xmlDetectCharEncoding(start, 4);
12685
if (enc != XML_CHAR_ENCODING_NONE) {
12686
xmlSwitchEncoding(ctxt, enc);
12690
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12692
if (ctxt->myDoc != NULL) {
12693
if (ctxt->wellFormed) {
12694
ret = ctxt->myDoc->extSubset;
12695
ctxt->myDoc->extSubset = NULL;
12700
tmp = ret->children;
12701
while (tmp != NULL) {
12709
xmlFreeDoc(ctxt->myDoc);
12710
ctxt->myDoc = NULL;
12712
if (sax != NULL) ctxt->sax = NULL;
12713
xmlFreeParserCtxt(ctxt);
12720
* @sax: the SAX handler block
12721
* @ExternalID: a NAME* containing the External ID of the DTD
12722
* @SystemID: a NAME* containing the URL to the DTD
12724
* Load and parse an external subset.
12726
* Returns the resulting xmlDtdPtr or NULL in case of error.
12730
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12731
const xmlChar *SystemID) {
12732
xmlDtdPtr ret = NULL;
12733
xmlParserCtxtPtr ctxt;
12734
xmlParserInputPtr input = NULL;
12735
xmlCharEncoding enc;
12736
xmlChar* systemIdCanonic;
12738
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12740
ctxt = xmlNewParserCtxt();
12741
if (ctxt == NULL) {
12745
/* We are loading a DTD */
12746
ctxt->options |= XML_PARSE_DTDLOAD;
12749
* Set-up the SAX context
12752
if (ctxt->sax != NULL)
12753
xmlFree(ctxt->sax);
12755
ctxt->userData = ctxt;
12759
* Canonicalise the system ID
12761
systemIdCanonic = xmlCanonicPath(SystemID);
12762
if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12763
xmlFreeParserCtxt(ctxt);
12768
* Ask the Entity resolver to load the damn thing
12771
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12772
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12774
if (input == NULL) {
12775
if (sax != NULL) ctxt->sax = NULL;
12776
xmlFreeParserCtxt(ctxt);
12777
if (systemIdCanonic != NULL)
12778
xmlFree(systemIdCanonic);
12783
* plug some encoding conversion routines here.
12785
if (xmlPushInput(ctxt, input) < 0) {
12786
if (sax != NULL) ctxt->sax = NULL;
12787
xmlFreeParserCtxt(ctxt);
12788
if (systemIdCanonic != NULL)
12789
xmlFree(systemIdCanonic);
12792
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12793
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12794
xmlSwitchEncoding(ctxt, enc);
12797
if (input->filename == NULL)
12798
input->filename = (char *) systemIdCanonic;
12800
xmlFree(systemIdCanonic);
12803
input->base = ctxt->input->cur;
12804
input->cur = ctxt->input->cur;
12805
input->free = NULL;
12808
* let's parse that entity knowing it's an external subset.
12810
ctxt->inSubset = 2;
12811
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12812
if (ctxt->myDoc == NULL) {
12813
xmlErrMemory(ctxt, "New Doc failed");
12814
if (sax != NULL) ctxt->sax = NULL;
12815
xmlFreeParserCtxt(ctxt);
12818
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12819
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12820
ExternalID, SystemID);
12821
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12823
if (ctxt->myDoc != NULL) {
12824
if (ctxt->wellFormed) {
12825
ret = ctxt->myDoc->extSubset;
12826
ctxt->myDoc->extSubset = NULL;
12831
tmp = ret->children;
12832
while (tmp != NULL) {
12840
xmlFreeDoc(ctxt->myDoc);
12841
ctxt->myDoc = NULL;
12843
if (sax != NULL) ctxt->sax = NULL;
12844
xmlFreeParserCtxt(ctxt);
12852
* @ExternalID: a NAME* containing the External ID of the DTD
12853
* @SystemID: a NAME* containing the URL to the DTD
12855
* Load and parse an external subset.
12857
* Returns the resulting xmlDtdPtr or NULL in case of error.
12861
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12862
return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12864
#endif /* LIBXML_VALID_ENABLED */
12866
/************************************************************************
12868
* Front ends when parsing an Entity *
12870
************************************************************************/
12873
* xmlParseCtxtExternalEntity:
12874
* @ctx: the existing parsing context
12875
* @URL: the URL for the entity to load
12876
* @ID: the System ID for the entity to load
12877
* @lst: the return value for the set of parsed nodes
12879
* Parse an external general entity within an existing parsing context
12880
* An external general parsed entity is well-formed if it matches the
12881
* production labeled extParsedEnt.
12883
* [78] extParsedEnt ::= TextDecl? content
12885
* Returns 0 if the entity is well formed, -1 in case of args problem and
12886
* the parser error code otherwise
12890
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12891
const xmlChar *ID, xmlNodePtr *lst) {
12892
xmlParserCtxtPtr ctxt;
12894
xmlNodePtr newRoot;
12895
xmlSAXHandlerPtr oldsax = NULL;
12898
xmlCharEncoding enc;
12900
if (ctx == NULL) return(-1);
12902
if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12903
(ctx->depth > 1024)) {
12904
return(XML_ERR_ENTITY_LOOP);
12909
if ((URL == NULL) && (ID == NULL))
12911
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12914
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12915
if (ctxt == NULL) {
12919
oldsax = ctxt->sax;
12920
ctxt->sax = ctx->sax;
12921
xmlDetectSAX2(ctxt);
12922
newDoc = xmlNewDoc(BAD_CAST "1.0");
12923
if (newDoc == NULL) {
12924
xmlFreeParserCtxt(ctxt);
12927
newDoc->properties = XML_DOC_INTERNAL;
12928
if (ctx->myDoc->dict) {
12929
newDoc->dict = ctx->myDoc->dict;
12930
xmlDictReference(newDoc->dict);
12932
if (ctx->myDoc != NULL) {
12933
newDoc->intSubset = ctx->myDoc->intSubset;
12934
newDoc->extSubset = ctx->myDoc->extSubset;
12936
if (ctx->myDoc->URL != NULL) {
12937
newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12939
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12940
if (newRoot == NULL) {
12941
ctxt->sax = oldsax;
12942
xmlFreeParserCtxt(ctxt);
12943
newDoc->intSubset = NULL;
12944
newDoc->extSubset = NULL;
12945
xmlFreeDoc(newDoc);
12948
xmlAddChild((xmlNodePtr) newDoc, newRoot);
12949
nodePush(ctxt, newDoc->children);
12950
if (ctx->myDoc == NULL) {
12951
ctxt->myDoc = newDoc;
12953
ctxt->myDoc = ctx->myDoc;
12954
newDoc->children->doc = ctx->myDoc;
12958
* Get the 4 first bytes and decode the charset
12959
* if enc != XML_CHAR_ENCODING_NONE
12960
* plug some encoding conversion routines.
12963
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12968
enc = xmlDetectCharEncoding(start, 4);
12969
if (enc != XML_CHAR_ENCODING_NONE) {
12970
xmlSwitchEncoding(ctxt, enc);
12975
* Parse a possible text declaration first
12977
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12978
xmlParseTextDecl(ctxt);
12980
* An XML-1.0 document can't reference an entity not XML-1.0
12982
if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12983
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12984
xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12985
"Version mismatch between document and entity\n");
12990
* If the user provided its own SAX callbacks then reuse the
12991
* useData callback field, otherwise the expected setup in a
12992
* DOM builder is to have userData == ctxt
12994
if (ctx->userData == ctx)
12995
ctxt->userData = ctxt;
12997
ctxt->userData = ctx->userData;
13000
* Doing validity checking on chunk doesn't make sense
13002
ctxt->instate = XML_PARSER_CONTENT;
13003
ctxt->validate = ctx->validate;
13004
ctxt->valid = ctx->valid;
13005
ctxt->loadsubset = ctx->loadsubset;
13006
ctxt->depth = ctx->depth + 1;
13007
ctxt->replaceEntities = ctx->replaceEntities;
13008
if (ctxt->validate) {
13009
ctxt->vctxt.error = ctx->vctxt.error;
13010
ctxt->vctxt.warning = ctx->vctxt.warning;
13012
ctxt->vctxt.error = NULL;
13013
ctxt->vctxt.warning = NULL;
13015
ctxt->vctxt.nodeTab = NULL;
13016
ctxt->vctxt.nodeNr = 0;
13017
ctxt->vctxt.nodeMax = 0;
13018
ctxt->vctxt.node = NULL;
13019
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13020
ctxt->dict = ctx->dict;
13021
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13022
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13023
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13024
ctxt->dictNames = ctx->dictNames;
13025
ctxt->attsDefault = ctx->attsDefault;
13026
ctxt->attsSpecial = ctx->attsSpecial;
13027
ctxt->linenumbers = ctx->linenumbers;
13029
xmlParseContent(ctxt);
13031
ctx->validate = ctxt->validate;
13032
ctx->valid = ctxt->valid;
13033
if ((RAW == '<') && (NXT(1) == '/')) {
13034
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13035
} else if (RAW != 0) {
13036
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13038
if (ctxt->node != newDoc->children) {
13039
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13042
if (!ctxt->wellFormed) {
13043
if (ctxt->errNo == 0)
13052
* Return the newly created nodeset after unlinking it from
13053
* they pseudo parent.
13055
cur = newDoc->children->children;
13057
while (cur != NULL) {
13058
cur->parent = NULL;
13061
newDoc->children->children = NULL;
13065
ctxt->sax = oldsax;
13067
ctxt->attsDefault = NULL;
13068
ctxt->attsSpecial = NULL;
13069
xmlFreeParserCtxt(ctxt);
13070
newDoc->intSubset = NULL;
13071
newDoc->extSubset = NULL;
13072
xmlFreeDoc(newDoc);
13078
* xmlParseExternalEntityPrivate:
13079
* @doc: the document the chunk pertains to
13080
* @oldctxt: the previous parser context if available
13081
* @sax: the SAX handler bloc (possibly NULL)
13082
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13083
* @depth: Used for loop detection, use 0
13084
* @URL: the URL for the entity to load
13085
* @ID: the System ID for the entity to load
13086
* @list: the return value for the set of parsed nodes
13088
* Private version of xmlParseExternalEntity()
13090
* Returns 0 if the entity is well formed, -1 in case of args problem and
13091
* the parser error code otherwise
13094
static xmlParserErrors
13095
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13096
xmlSAXHandlerPtr sax,
13097
void *user_data, int depth, const xmlChar *URL,
13098
const xmlChar *ID, xmlNodePtr *list) {
13099
xmlParserCtxtPtr ctxt;
13101
xmlNodePtr newRoot;
13102
xmlSAXHandlerPtr oldsax = NULL;
13103
xmlParserErrors ret = XML_ERR_OK;
13105
xmlCharEncoding enc;
13107
if (((depth > 40) &&
13108
((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13110
return(XML_ERR_ENTITY_LOOP);
13115
if ((URL == NULL) && (ID == NULL))
13116
return(XML_ERR_INTERNAL_ERROR);
13118
return(XML_ERR_INTERNAL_ERROR);
13121
ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13122
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13123
ctxt->userData = ctxt;
13124
if (oldctxt != NULL) {
13125
ctxt->_private = oldctxt->_private;
13126
ctxt->loadsubset = oldctxt->loadsubset;
13127
ctxt->validate = oldctxt->validate;
13128
ctxt->external = oldctxt->external;
13129
ctxt->record_info = oldctxt->record_info;
13130
ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13131
ctxt->node_seq.length = oldctxt->node_seq.length;
13132
ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13135
* Doing validity checking on chunk without context
13136
* doesn't make sense
13138
ctxt->_private = NULL;
13139
ctxt->validate = 0;
13140
ctxt->external = 2;
13141
ctxt->loadsubset = 0;
13144
oldsax = ctxt->sax;
13146
if (user_data != NULL)
13147
ctxt->userData = user_data;
13149
xmlDetectSAX2(ctxt);
13150
newDoc = xmlNewDoc(BAD_CAST "1.0");
13151
if (newDoc == NULL) {
13152
ctxt->node_seq.maximum = 0;
13153
ctxt->node_seq.length = 0;
13154
ctxt->node_seq.buffer = NULL;
13155
xmlFreeParserCtxt(ctxt);
13156
return(XML_ERR_INTERNAL_ERROR);
13158
newDoc->properties = XML_DOC_INTERNAL;
13159
newDoc->intSubset = doc->intSubset;
13160
newDoc->extSubset = doc->extSubset;
13161
newDoc->dict = doc->dict;
13162
xmlDictReference(newDoc->dict);
13164
if (doc->URL != NULL) {
13165
newDoc->URL = xmlStrdup(doc->URL);
13167
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13168
if (newRoot == NULL) {
13170
ctxt->sax = oldsax;
13171
ctxt->node_seq.maximum = 0;
13172
ctxt->node_seq.length = 0;
13173
ctxt->node_seq.buffer = NULL;
13174
xmlFreeParserCtxt(ctxt);
13175
newDoc->intSubset = NULL;
13176
newDoc->extSubset = NULL;
13177
xmlFreeDoc(newDoc);
13178
return(XML_ERR_INTERNAL_ERROR);
13180
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13181
nodePush(ctxt, newDoc->children);
13183
newRoot->doc = doc;
13186
* Get the 4 first bytes and decode the charset
13187
* if enc != XML_CHAR_ENCODING_NONE
13188
* plug some encoding conversion routines.
13191
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13196
enc = xmlDetectCharEncoding(start, 4);
13197
if (enc != XML_CHAR_ENCODING_NONE) {
13198
xmlSwitchEncoding(ctxt, enc);
13203
* Parse a possible text declaration first
13205
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13206
xmlParseTextDecl(ctxt);
13209
ctxt->instate = XML_PARSER_CONTENT;
13210
ctxt->depth = depth;
13212
xmlParseContent(ctxt);
13214
if ((RAW == '<') && (NXT(1) == '/')) {
13215
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13216
} else if (RAW != 0) {
13217
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13219
if (ctxt->node != newDoc->children) {
13220
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13223
if (!ctxt->wellFormed) {
13224
if (ctxt->errNo == 0)
13225
ret = XML_ERR_INTERNAL_ERROR;
13227
ret = (xmlParserErrors)ctxt->errNo;
13229
if (list != NULL) {
13233
* Return the newly created nodeset after unlinking it from
13234
* they pseudo parent.
13236
cur = newDoc->children->children;
13238
while (cur != NULL) {
13239
cur->parent = NULL;
13242
newDoc->children->children = NULL;
13248
* Record in the parent context the number of entities replacement
13249
* done when parsing that reference.
13251
if (oldctxt != NULL)
13252
oldctxt->nbentities += ctxt->nbentities;
13255
* Also record the size of the entity parsed
13257
if (ctxt->input != NULL) {
13258
oldctxt->sizeentities += ctxt->input->consumed;
13259
oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13262
* And record the last error if any
13264
if (ctxt->lastError.code != XML_ERR_OK)
13265
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13268
ctxt->sax = oldsax;
13269
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13270
oldctxt->node_seq.length = ctxt->node_seq.length;
13271
oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13272
ctxt->node_seq.maximum = 0;
13273
ctxt->node_seq.length = 0;
13274
ctxt->node_seq.buffer = NULL;
13275
xmlFreeParserCtxt(ctxt);
13276
newDoc->intSubset = NULL;
13277
newDoc->extSubset = NULL;
13278
xmlFreeDoc(newDoc);
13283
#ifdef LIBXML_SAX1_ENABLED
13285
* xmlParseExternalEntity:
13286
* @doc: the document the chunk pertains to
13287
* @sax: the SAX handler bloc (possibly NULL)
13288
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13289
* @depth: Used for loop detection, use 0
13290
* @URL: the URL for the entity to load
13291
* @ID: the System ID for the entity to load
13292
* @lst: the return value for the set of parsed nodes
13294
* Parse an external general entity
13295
* An external general parsed entity is well-formed if it matches the
13296
* production labeled extParsedEnt.
13298
* [78] extParsedEnt ::= TextDecl? content
13300
* Returns 0 if the entity is well formed, -1 in case of args problem and
13301
* the parser error code otherwise
13305
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13306
int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13307
return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13312
* xmlParseBalancedChunkMemory:
13313
* @doc: the document the chunk pertains to
13314
* @sax: the SAX handler bloc (possibly NULL)
13315
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13316
* @depth: Used for loop detection, use 0
13317
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13318
* @lst: the return value for the set of parsed nodes
13320
* Parse a well-balanced chunk of an XML document
13321
* called by the parser
13322
* The allowed sequence for the Well Balanced Chunk is the one defined by
13323
* the content production in the XML grammar:
13325
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13327
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13328
* the parser error code otherwise
13332
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13333
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13334
return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13335
depth, string, lst, 0 );
13337
#endif /* LIBXML_SAX1_ENABLED */
13340
* xmlParseBalancedChunkMemoryInternal:
13341
* @oldctxt: the existing parsing context
13342
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13343
* @user_data: the user data field for the parser context
13344
* @lst: the return value for the set of parsed nodes
13347
* Parse a well-balanced chunk of an XML document
13348
* called by the parser
13349
* The allowed sequence for the Well Balanced Chunk is the one defined by
13350
* the content production in the XML grammar:
13352
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13354
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13355
* error code otherwise
13357
* In case recover is set to 1, the nodelist will not be empty even if
13358
* the parsed chunk is not well balanced.
13360
static xmlParserErrors
13361
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13362
const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13363
xmlParserCtxtPtr ctxt;
13364
xmlDocPtr newDoc = NULL;
13365
xmlNodePtr newRoot;
13366
xmlSAXHandlerPtr oldsax = NULL;
13367
xmlNodePtr content = NULL;
13368
xmlNodePtr last = NULL;
13370
xmlParserErrors ret = XML_ERR_OK;
13375
if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13376
(oldctxt->depth > 1024)) {
13377
return(XML_ERR_ENTITY_LOOP);
13383
if (string == NULL)
13384
return(XML_ERR_INTERNAL_ERROR);
13386
size = xmlStrlen(string);
13388
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13389
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13390
if (user_data != NULL)
13391
ctxt->userData = user_data;
13393
ctxt->userData = ctxt;
13394
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13395
ctxt->dict = oldctxt->dict;
13396
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13397
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13398
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13401
/* propagate namespaces down the entity */
13402
for (i = 0;i < oldctxt->nsNr;i += 2) {
13403
nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13407
oldsax = ctxt->sax;
13408
ctxt->sax = oldctxt->sax;
13409
xmlDetectSAX2(ctxt);
13410
ctxt->replaceEntities = oldctxt->replaceEntities;
13411
ctxt->options = oldctxt->options;
13413
ctxt->_private = oldctxt->_private;
13414
if (oldctxt->myDoc == NULL) {
13415
newDoc = xmlNewDoc(BAD_CAST "1.0");
13416
if (newDoc == NULL) {
13417
ctxt->sax = oldsax;
13419
xmlFreeParserCtxt(ctxt);
13420
return(XML_ERR_INTERNAL_ERROR);
13422
newDoc->properties = XML_DOC_INTERNAL;
13423
newDoc->dict = ctxt->dict;
13424
xmlDictReference(newDoc->dict);
13425
ctxt->myDoc = newDoc;
13427
ctxt->myDoc = oldctxt->myDoc;
13428
content = ctxt->myDoc->children;
13429
last = ctxt->myDoc->last;
13431
newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13432
if (newRoot == NULL) {
13433
ctxt->sax = oldsax;
13435
xmlFreeParserCtxt(ctxt);
13436
if (newDoc != NULL) {
13437
xmlFreeDoc(newDoc);
13439
return(XML_ERR_INTERNAL_ERROR);
13441
ctxt->myDoc->children = NULL;
13442
ctxt->myDoc->last = NULL;
13443
xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13444
nodePush(ctxt, ctxt->myDoc->children);
13445
ctxt->instate = XML_PARSER_CONTENT;
13446
ctxt->depth = oldctxt->depth + 1;
13448
ctxt->validate = 0;
13449
ctxt->loadsubset = oldctxt->loadsubset;
13450
if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13452
* ID/IDREF registration will be done in xmlValidateElement below
13454
ctxt->loadsubset |= XML_SKIP_IDS;
13456
ctxt->dictNames = oldctxt->dictNames;
13457
ctxt->attsDefault = oldctxt->attsDefault;
13458
ctxt->attsSpecial = oldctxt->attsSpecial;
13460
xmlParseContent(ctxt);
13461
if ((RAW == '<') && (NXT(1) == '/')) {
13462
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13463
} else if (RAW != 0) {
13464
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13466
if (ctxt->node != ctxt->myDoc->children) {
13467
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13470
if (!ctxt->wellFormed) {
13471
if (ctxt->errNo == 0)
13472
ret = XML_ERR_INTERNAL_ERROR;
13474
ret = (xmlParserErrors)ctxt->errNo;
13479
if ((lst != NULL) && (ret == XML_ERR_OK)) {
13483
* Return the newly created nodeset after unlinking it from
13484
* they pseudo parent.
13486
cur = ctxt->myDoc->children->children;
13488
while (cur != NULL) {
13489
#ifdef LIBXML_VALID_ENABLED
13490
if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13491
(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13492
(cur->type == XML_ELEMENT_NODE)) {
13493
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13494
oldctxt->myDoc, cur);
13496
#endif /* LIBXML_VALID_ENABLED */
13497
cur->parent = NULL;
13500
ctxt->myDoc->children->children = NULL;
13502
if (ctxt->myDoc != NULL) {
13503
xmlFreeNode(ctxt->myDoc->children);
13504
ctxt->myDoc->children = content;
13505
ctxt->myDoc->last = last;
13509
* Record in the parent context the number of entities replacement
13510
* done when parsing that reference.
13512
if (oldctxt != NULL)
13513
oldctxt->nbentities += ctxt->nbentities;
13516
* Also record the last error if any
13518
if (ctxt->lastError.code != XML_ERR_OK)
13519
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13521
ctxt->sax = oldsax;
13523
ctxt->attsDefault = NULL;
13524
ctxt->attsSpecial = NULL;
13525
xmlFreeParserCtxt(ctxt);
13526
if (newDoc != NULL) {
13527
xmlFreeDoc(newDoc);
13534
* xmlParseInNodeContext:
13535
* @node: the context node
13536
* @data: the input string
13537
* @datalen: the input string length in bytes
13538
* @options: a combination of xmlParserOption
13539
* @lst: the return value for the set of parsed nodes
13541
* Parse a well-balanced chunk of an XML document
13542
* within the context (DTD, namespaces, etc ...) of the given node.
13544
* The allowed sequence for the data is a Well Balanced Chunk defined by
13545
* the content production in the XML grammar:
13547
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13549
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13550
* error code otherwise
13553
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13554
int options, xmlNodePtr *lst) {
13556
xmlParserCtxtPtr ctxt;
13557
xmlDocPtr doc = NULL;
13558
xmlNodePtr fake, cur;
13561
xmlParserErrors ret = XML_ERR_OK;
13564
* check all input parameters, grab the document
13566
if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13567
return(XML_ERR_INTERNAL_ERROR);
13568
switch (node->type) {
13569
case XML_ELEMENT_NODE:
13570
case XML_ATTRIBUTE_NODE:
13571
case XML_TEXT_NODE:
13572
case XML_CDATA_SECTION_NODE:
13573
case XML_ENTITY_REF_NODE:
13575
case XML_COMMENT_NODE:
13576
case XML_DOCUMENT_NODE:
13577
case XML_HTML_DOCUMENT_NODE:
13580
return(XML_ERR_INTERNAL_ERROR);
13583
while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13584
(node->type != XML_DOCUMENT_NODE) &&
13585
(node->type != XML_HTML_DOCUMENT_NODE))
13586
node = node->parent;
13588
return(XML_ERR_INTERNAL_ERROR);
13589
if (node->type == XML_ELEMENT_NODE)
13592
doc = (xmlDocPtr) node;
13594
return(XML_ERR_INTERNAL_ERROR);
13597
* allocate a context and set-up everything not related to the
13598
* node position in the tree
13600
if (doc->type == XML_DOCUMENT_NODE)
13601
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13602
#ifdef LIBXML_HTML_ENABLED
13603
else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13604
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13606
* When parsing in context, it makes no sense to add implied
13607
* elements like html/body/etc...
13609
options |= HTML_PARSE_NOIMPLIED;
13613
return(XML_ERR_INTERNAL_ERROR);
13616
return(XML_ERR_NO_MEMORY);
13619
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13620
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13621
* we must wait until the last moment to free the original one.
13623
if (doc->dict != NULL) {
13624
if (ctxt->dict != NULL)
13625
xmlDictFree(ctxt->dict);
13626
ctxt->dict = doc->dict;
13628
options |= XML_PARSE_NODICT;
13630
if (doc->encoding != NULL) {
13631
xmlCharEncodingHandlerPtr hdlr;
13633
if (ctxt->encoding != NULL)
13634
xmlFree((xmlChar *) ctxt->encoding);
13635
ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13637
hdlr = xmlFindCharEncodingHandler(doc->encoding);
13638
if (hdlr != NULL) {
13639
xmlSwitchToEncoding(ctxt, hdlr);
13641
return(XML_ERR_UNSUPPORTED_ENCODING);
13645
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13646
xmlDetectSAX2(ctxt);
13649
fake = xmlNewComment(NULL);
13650
if (fake == NULL) {
13651
xmlFreeParserCtxt(ctxt);
13652
return(XML_ERR_NO_MEMORY);
13654
xmlAddChild(node, fake);
13656
if (node->type == XML_ELEMENT_NODE) {
13657
nodePush(ctxt, node);
13659
* initialize the SAX2 namespaces stack
13662
while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13663
xmlNsPtr ns = cur->nsDef;
13664
const xmlChar *iprefix, *ihref;
13666
while (ns != NULL) {
13668
iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13669
ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13671
iprefix = ns->prefix;
13675
if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13676
nsPush(ctxt, iprefix, ihref);
13683
ctxt->instate = XML_PARSER_CONTENT;
13686
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13688
* ID/IDREF registration will be done in xmlValidateElement below
13690
ctxt->loadsubset |= XML_SKIP_IDS;
13693
#ifdef LIBXML_HTML_ENABLED
13694
if (doc->type == XML_HTML_DOCUMENT_NODE)
13695
__htmlParseContent(ctxt);
13698
xmlParseContent(ctxt);
13701
if ((RAW == '<') && (NXT(1) == '/')) {
13702
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13703
} else if (RAW != 0) {
13704
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13706
if ((ctxt->node != NULL) && (ctxt->node != node)) {
13707
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13708
ctxt->wellFormed = 0;
13711
if (!ctxt->wellFormed) {
13712
if (ctxt->errNo == 0)
13713
ret = XML_ERR_INTERNAL_ERROR;
13715
ret = (xmlParserErrors)ctxt->errNo;
13721
* Return the newly created nodeset after unlinking it from
13722
* the pseudo sibling.
13735
while (cur != NULL) {
13736
cur->parent = NULL;
13740
xmlUnlinkNode(fake);
13744
if (ret != XML_ERR_OK) {
13745
xmlFreeNodeList(*lst);
13749
if (doc->dict != NULL)
13751
xmlFreeParserCtxt(ctxt);
13755
return(XML_ERR_INTERNAL_ERROR);
13759
#ifdef LIBXML_SAX1_ENABLED
13761
* xmlParseBalancedChunkMemoryRecover:
13762
* @doc: the document the chunk pertains to
13763
* @sax: the SAX handler bloc (possibly NULL)
13764
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13765
* @depth: Used for loop detection, use 0
13766
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13767
* @lst: the return value for the set of parsed nodes
13768
* @recover: return nodes even if the data is broken (use 0)
13771
* Parse a well-balanced chunk of an XML document
13772
* called by the parser
13773
* The allowed sequence for the Well Balanced Chunk is the one defined by
13774
* the content production in the XML grammar:
13776
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13778
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13779
* the parser error code otherwise
13781
* In case recover is set to 1, the nodelist will not be empty even if
13782
* the parsed chunk is not well balanced, assuming the parsing succeeded to
13786
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13787
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13789
xmlParserCtxtPtr ctxt;
13791
xmlSAXHandlerPtr oldsax = NULL;
13792
xmlNodePtr content, newRoot;
13797
return(XML_ERR_ENTITY_LOOP);
13803
if (string == NULL)
13806
size = xmlStrlen(string);
13808
ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13809
if (ctxt == NULL) return(-1);
13810
ctxt->userData = ctxt;
13812
oldsax = ctxt->sax;
13814
if (user_data != NULL)
13815
ctxt->userData = user_data;
13817
newDoc = xmlNewDoc(BAD_CAST "1.0");
13818
if (newDoc == NULL) {
13819
xmlFreeParserCtxt(ctxt);
13822
newDoc->properties = XML_DOC_INTERNAL;
13823
if ((doc != NULL) && (doc->dict != NULL)) {
13824
xmlDictFree(ctxt->dict);
13825
ctxt->dict = doc->dict;
13826
xmlDictReference(ctxt->dict);
13827
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13828
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13829
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13830
ctxt->dictNames = 1;
13832
xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13835
newDoc->intSubset = doc->intSubset;
13836
newDoc->extSubset = doc->extSubset;
13838
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13839
if (newRoot == NULL) {
13841
ctxt->sax = oldsax;
13842
xmlFreeParserCtxt(ctxt);
13843
newDoc->intSubset = NULL;
13844
newDoc->extSubset = NULL;
13845
xmlFreeDoc(newDoc);
13848
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13849
nodePush(ctxt, newRoot);
13851
ctxt->myDoc = newDoc;
13853
ctxt->myDoc = newDoc;
13854
newDoc->children->doc = doc;
13855
/* Ensure that doc has XML spec namespace */
13856
xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13857
newDoc->oldNs = doc->oldNs;
13859
ctxt->instate = XML_PARSER_CONTENT;
13860
ctxt->depth = depth;
13863
* Doing validity checking on chunk doesn't make sense
13865
ctxt->validate = 0;
13866
ctxt->loadsubset = 0;
13867
xmlDetectSAX2(ctxt);
13869
if ( doc != NULL ){
13870
content = doc->children;
13871
doc->children = NULL;
13872
xmlParseContent(ctxt);
13873
doc->children = content;
13876
xmlParseContent(ctxt);
13878
if ((RAW == '<') && (NXT(1) == '/')) {
13879
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13880
} else if (RAW != 0) {
13881
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13883
if (ctxt->node != newDoc->children) {
13884
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13887
if (!ctxt->wellFormed) {
13888
if (ctxt->errNo == 0)
13896
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13900
* Return the newly created nodeset after unlinking it from
13901
* they pseudo parent.
13903
cur = newDoc->children->children;
13905
while (cur != NULL) {
13906
xmlSetTreeDoc(cur, doc);
13907
cur->parent = NULL;
13910
newDoc->children->children = NULL;
13914
ctxt->sax = oldsax;
13915
xmlFreeParserCtxt(ctxt);
13916
newDoc->intSubset = NULL;
13917
newDoc->extSubset = NULL;
13918
newDoc->oldNs = NULL;
13919
xmlFreeDoc(newDoc);
13925
* xmlSAXParseEntity:
13926
* @sax: the SAX handler block
13927
* @filename: the filename
13929
* parse an XML external entity out of context and build a tree.
13930
* It use the given SAX function block to handle the parsing callback.
13931
* If sax is NULL, fallback to the default DOM tree building routines.
13933
* [78] extParsedEnt ::= TextDecl? content
13935
* This correspond to a "Well Balanced" chunk
13937
* Returns the resulting document tree
13941
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13943
xmlParserCtxtPtr ctxt;
13945
ctxt = xmlCreateFileParserCtxt(filename);
13946
if (ctxt == NULL) {
13950
if (ctxt->sax != NULL)
13951
xmlFree(ctxt->sax);
13953
ctxt->userData = NULL;
13956
xmlParseExtParsedEnt(ctxt);
13958
if (ctxt->wellFormed)
13962
xmlFreeDoc(ctxt->myDoc);
13963
ctxt->myDoc = NULL;
13967
xmlFreeParserCtxt(ctxt);
13974
* @filename: the filename
13976
* parse an XML external entity out of context and build a tree.
13978
* [78] extParsedEnt ::= TextDecl? content
13980
* This correspond to a "Well Balanced" chunk
13982
* Returns the resulting document tree
13986
xmlParseEntity(const char *filename) {
13987
return(xmlSAXParseEntity(NULL, filename));
13989
#endif /* LIBXML_SAX1_ENABLED */
13992
* xmlCreateEntityParserCtxtInternal:
13993
* @URL: the entity URL
13994
* @ID: the entity PUBLIC ID
13995
* @base: a possible base for the target URI
13996
* @pctx: parser context used to set options on new context
13998
* Create a parser context for an external entity
13999
* Automatic support for ZLIB/Compress compressed document is provided
14000
* by default if found at compile-time.
14002
* Returns the new parser context or NULL
14004
static xmlParserCtxtPtr
14005
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14006
const xmlChar *base, xmlParserCtxtPtr pctx) {
14007
xmlParserCtxtPtr ctxt;
14008
xmlParserInputPtr inputStream;
14009
char *directory = NULL;
14012
ctxt = xmlNewParserCtxt();
14013
if (ctxt == NULL) {
14017
if (pctx != NULL) {
14018
ctxt->options = pctx->options;
14019
ctxt->_private = pctx->_private;
14022
uri = xmlBuildURI(URL, base);
14025
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14026
if (inputStream == NULL) {
14027
xmlFreeParserCtxt(ctxt);
14031
inputPush(ctxt, inputStream);
14033
if ((ctxt->directory == NULL) && (directory == NULL))
14034
directory = xmlParserGetDirectory((char *)URL);
14035
if ((ctxt->directory == NULL) && (directory != NULL))
14036
ctxt->directory = directory;
14038
inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14039
if (inputStream == NULL) {
14041
xmlFreeParserCtxt(ctxt);
14045
inputPush(ctxt, inputStream);
14047
if ((ctxt->directory == NULL) && (directory == NULL))
14048
directory = xmlParserGetDirectory((char *)uri);
14049
if ((ctxt->directory == NULL) && (directory != NULL))
14050
ctxt->directory = directory;
14057
* xmlCreateEntityParserCtxt:
14058
* @URL: the entity URL
14059
* @ID: the entity PUBLIC ID
14060
* @base: a possible base for the target URI
14062
* Create a parser context for an external entity
14063
* Automatic support for ZLIB/Compress compressed document is provided
14064
* by default if found at compile-time.
14066
* Returns the new parser context or NULL
14069
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14070
const xmlChar *base) {
14071
return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14075
/************************************************************************
14077
* Front ends when parsing from a file *
14079
************************************************************************/
14082
* xmlCreateURLParserCtxt:
14083
* @filename: the filename or URL
14084
* @options: a combination of xmlParserOption
14086
* Create a parser context for a file or URL content.
14087
* Automatic support for ZLIB/Compress compressed document is provided
14088
* by default if found at compile-time and for file accesses
14090
* Returns the new parser context or NULL
14093
xmlCreateURLParserCtxt(const char *filename, int options)
14095
xmlParserCtxtPtr ctxt;
14096
xmlParserInputPtr inputStream;
14097
char *directory = NULL;
14099
ctxt = xmlNewParserCtxt();
14100
if (ctxt == NULL) {
14101
xmlErrMemory(NULL, "cannot allocate parser context");
14106
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14107
ctxt->linenumbers = 1;
14109
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14110
if (inputStream == NULL) {
14111
xmlFreeParserCtxt(ctxt);
14115
inputPush(ctxt, inputStream);
14116
if ((ctxt->directory == NULL) && (directory == NULL))
14117
directory = xmlParserGetDirectory(filename);
14118
if ((ctxt->directory == NULL) && (directory != NULL))
14119
ctxt->directory = directory;
14125
* xmlCreateFileParserCtxt:
14126
* @filename: the filename
14128
* Create a parser context for a file content.
14129
* Automatic support for ZLIB/Compress compressed document is provided
14130
* by default if found at compile-time.
14132
* Returns the new parser context or NULL
14135
xmlCreateFileParserCtxt(const char *filename)
14137
return(xmlCreateURLParserCtxt(filename, 0));
14140
#ifdef LIBXML_SAX1_ENABLED
14142
* xmlSAXParseFileWithData:
14143
* @sax: the SAX handler block
14144
* @filename: the filename
14145
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14147
* @data: the userdata
14149
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14150
* compressed document is provided by default if found at compile-time.
14151
* It use the given SAX function block to handle the parsing callback.
14152
* If sax is NULL, fallback to the default DOM tree building routines.
14154
* User data (void *) is stored within the parser context in the
14155
* context's _private member, so it is available nearly everywhere in libxml
14157
* Returns the resulting document tree
14161
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14162
int recovery, void *data) {
14164
xmlParserCtxtPtr ctxt;
14168
ctxt = xmlCreateFileParserCtxt(filename);
14169
if (ctxt == NULL) {
14173
if (ctxt->sax != NULL)
14174
xmlFree(ctxt->sax);
14177
xmlDetectSAX2(ctxt);
14179
ctxt->_private = data;
14182
if (ctxt->directory == NULL)
14183
ctxt->directory = xmlParserGetDirectory(filename);
14185
ctxt->recovery = recovery;
14187
xmlParseDocument(ctxt);
14189
if ((ctxt->wellFormed) || recovery) {
14192
if (ctxt->input->buf->compressed > 0)
14193
ret->compression = 9;
14195
ret->compression = ctxt->input->buf->compressed;
14200
xmlFreeDoc(ctxt->myDoc);
14201
ctxt->myDoc = NULL;
14205
xmlFreeParserCtxt(ctxt);
14212
* @sax: the SAX handler block
14213
* @filename: the filename
14214
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14217
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14218
* compressed document is provided by default if found at compile-time.
14219
* It use the given SAX function block to handle the parsing callback.
14220
* If sax is NULL, fallback to the default DOM tree building routines.
14222
* Returns the resulting document tree
14226
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14228
return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14233
* @cur: a pointer to an array of xmlChar
14235
* parse an XML in-memory document and build a tree.
14236
* In the case the document is not Well Formed, a attempt to build a
14237
* tree is tried anyway
14239
* Returns the resulting document tree or NULL in case of failure
14243
xmlRecoverDoc(const xmlChar *cur) {
14244
return(xmlSAXParseDoc(NULL, cur, 1));
14249
* @filename: the filename
14251
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14252
* compressed document is provided by default if found at compile-time.
14254
* Returns the resulting document tree if the file was wellformed,
14259
xmlParseFile(const char *filename) {
14260
return(xmlSAXParseFile(NULL, filename, 0));
14265
* @filename: the filename
14267
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
14268
* compressed document is provided by default if found at compile-time.
14269
* In the case the document is not Well Formed, it attempts to build
14272
* Returns the resulting document tree or NULL in case of failure
14276
xmlRecoverFile(const char *filename) {
14277
return(xmlSAXParseFile(NULL, filename, 1));
14282
* xmlSetupParserForBuffer:
14283
* @ctxt: an XML parser context
14284
* @buffer: a xmlChar * buffer
14285
* @filename: a file name
14287
* Setup the parser context to parse a new buffer; Clears any prior
14288
* contents from the parser context. The buffer parameter must not be
14289
* NULL, but the filename parameter can be
14292
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14293
const char* filename)
14295
xmlParserInputPtr input;
14297
if ((ctxt == NULL) || (buffer == NULL))
14300
input = xmlNewInputStream(ctxt);
14301
if (input == NULL) {
14302
xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14303
xmlClearParserCtxt(ctxt);
14307
xmlClearParserCtxt(ctxt);
14308
if (filename != NULL)
14309
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14310
input->base = buffer;
14311
input->cur = buffer;
14312
input->end = &buffer[xmlStrlen(buffer)];
14313
inputPush(ctxt, input);
14317
* xmlSAXUserParseFile:
14318
* @sax: a SAX handler
14319
* @user_data: The user data returned on SAX callbacks
14320
* @filename: a file name
14322
* parse an XML file and call the given SAX handler routines.
14323
* Automatic support for ZLIB/Compress compressed document is provided
14325
* Returns 0 in case of success or a error number otherwise
14328
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14329
const char *filename) {
14331
xmlParserCtxtPtr ctxt;
14333
ctxt = xmlCreateFileParserCtxt(filename);
14334
if (ctxt == NULL) return -1;
14335
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14336
xmlFree(ctxt->sax);
14338
xmlDetectSAX2(ctxt);
14340
if (user_data != NULL)
14341
ctxt->userData = user_data;
14343
xmlParseDocument(ctxt);
14345
if (ctxt->wellFormed)
14348
if (ctxt->errNo != 0)
14355
if (ctxt->myDoc != NULL) {
14356
xmlFreeDoc(ctxt->myDoc);
14357
ctxt->myDoc = NULL;
14359
xmlFreeParserCtxt(ctxt);
14363
#endif /* LIBXML_SAX1_ENABLED */
14365
/************************************************************************
14367
* Front ends when parsing from memory *
14369
************************************************************************/
14372
* xmlCreateMemoryParserCtxt:
14373
* @buffer: a pointer to a char array
14374
* @size: the size of the array
14376
* Create a parser context for an XML in-memory document.
14378
* Returns the new parser context or NULL
14381
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14382
xmlParserCtxtPtr ctxt;
14383
xmlParserInputPtr input;
14384
xmlParserInputBufferPtr buf;
14386
if (buffer == NULL)
14391
ctxt = xmlNewParserCtxt();
14395
/* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14396
buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14398
xmlFreeParserCtxt(ctxt);
14402
input = xmlNewInputStream(ctxt);
14403
if (input == NULL) {
14404
xmlFreeParserInputBuffer(buf);
14405
xmlFreeParserCtxt(ctxt);
14409
input->filename = NULL;
14411
xmlBufResetInput(input->buf->buffer, input);
14413
inputPush(ctxt, input);
14417
#ifdef LIBXML_SAX1_ENABLED
14419
* xmlSAXParseMemoryWithData:
14420
* @sax: the SAX handler block
14421
* @buffer: an pointer to a char array
14422
* @size: the size of the array
14423
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14425
* @data: the userdata
14427
* parse an XML in-memory block and use the given SAX function block
14428
* to handle the parsing callback. If sax is NULL, fallback to the default
14429
* DOM tree building routines.
14431
* User data (void *) is stored within the parser context in the
14432
* context's _private member, so it is available nearly everywhere in libxml
14434
* Returns the resulting document tree
14438
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14439
int size, int recovery, void *data) {
14441
xmlParserCtxtPtr ctxt;
14445
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14446
if (ctxt == NULL) return(NULL);
14448
if (ctxt->sax != NULL)
14449
xmlFree(ctxt->sax);
14452
xmlDetectSAX2(ctxt);
14454
ctxt->_private=data;
14457
ctxt->recovery = recovery;
14459
xmlParseDocument(ctxt);
14461
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14464
xmlFreeDoc(ctxt->myDoc);
14465
ctxt->myDoc = NULL;
14469
xmlFreeParserCtxt(ctxt);
14475
* xmlSAXParseMemory:
14476
* @sax: the SAX handler block
14477
* @buffer: an pointer to a char array
14478
* @size: the size of the array
14479
* @recovery: work in recovery mode, i.e. tries to read not Well Formed
14482
* parse an XML in-memory block and use the given SAX function block
14483
* to handle the parsing callback. If sax is NULL, fallback to the default
14484
* DOM tree building routines.
14486
* Returns the resulting document tree
14489
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14490
int size, int recovery) {
14491
return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14496
* @buffer: an pointer to a char array
14497
* @size: the size of the array
14499
* parse an XML in-memory block and build a tree.
14501
* Returns the resulting document tree
14504
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14505
return(xmlSAXParseMemory(NULL, buffer, size, 0));
14509
* xmlRecoverMemory:
14510
* @buffer: an pointer to a char array
14511
* @size: the size of the array
14513
* parse an XML in-memory block and build a tree.
14514
* In the case the document is not Well Formed, an attempt to
14515
* build a tree is tried anyway
14517
* Returns the resulting document tree or NULL in case of error
14520
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14521
return(xmlSAXParseMemory(NULL, buffer, size, 1));
14525
* xmlSAXUserParseMemory:
14526
* @sax: a SAX handler
14527
* @user_data: The user data returned on SAX callbacks
14528
* @buffer: an in-memory XML document input
14529
* @size: the length of the XML document in bytes
14531
* A better SAX parsing routine.
14532
* parse an XML in-memory buffer and call the given SAX handler routines.
14534
* Returns 0 in case of success or a error number otherwise
14536
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14537
const char *buffer, int size) {
14539
xmlParserCtxtPtr ctxt;
14543
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14544
if (ctxt == NULL) return -1;
14545
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14546
xmlFree(ctxt->sax);
14548
xmlDetectSAX2(ctxt);
14550
if (user_data != NULL)
14551
ctxt->userData = user_data;
14553
xmlParseDocument(ctxt);
14555
if (ctxt->wellFormed)
14558
if (ctxt->errNo != 0)
14565
if (ctxt->myDoc != NULL) {
14566
xmlFreeDoc(ctxt->myDoc);
14567
ctxt->myDoc = NULL;
14569
xmlFreeParserCtxt(ctxt);
14573
#endif /* LIBXML_SAX1_ENABLED */
14576
* xmlCreateDocParserCtxt:
14577
* @cur: a pointer to an array of xmlChar
14579
* Creates a parser context for an XML in-memory document.
14581
* Returns the new parser context or NULL
14584
xmlCreateDocParserCtxt(const xmlChar *cur) {
14589
len = xmlStrlen(cur);
14590
return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14593
#ifdef LIBXML_SAX1_ENABLED
14596
* @sax: the SAX handler block
14597
* @cur: a pointer to an array of xmlChar
14598
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14601
* parse an XML in-memory document and build a tree.
14602
* It use the given SAX function block to handle the parsing callback.
14603
* If sax is NULL, fallback to the default DOM tree building routines.
14605
* Returns the resulting document tree
14609
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14611
xmlParserCtxtPtr ctxt;
14612
xmlSAXHandlerPtr oldsax = NULL;
14614
if (cur == NULL) return(NULL);
14617
ctxt = xmlCreateDocParserCtxt(cur);
14618
if (ctxt == NULL) return(NULL);
14620
oldsax = ctxt->sax;
14622
ctxt->userData = NULL;
14624
xmlDetectSAX2(ctxt);
14626
xmlParseDocument(ctxt);
14627
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14630
xmlFreeDoc(ctxt->myDoc);
14631
ctxt->myDoc = NULL;
14634
ctxt->sax = oldsax;
14635
xmlFreeParserCtxt(ctxt);
14642
* @cur: a pointer to an array of xmlChar
14644
* parse an XML in-memory document and build a tree.
14646
* Returns the resulting document tree
14650
xmlParseDoc(const xmlChar *cur) {
14651
return(xmlSAXParseDoc(NULL, cur, 0));
14653
#endif /* LIBXML_SAX1_ENABLED */
14655
#ifdef LIBXML_LEGACY_ENABLED
14656
/************************************************************************
14658
* Specific function to keep track of entities references *
14659
* and used by the XSLT debugger *
14661
************************************************************************/
14663
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14666
* xmlAddEntityReference:
14667
* @ent : A valid entity
14668
* @firstNode : A valid first node for children of entity
14669
* @lastNode : A valid last node of children entity
14671
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14674
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14675
xmlNodePtr lastNode)
14677
if (xmlEntityRefFunc != NULL) {
14678
(*xmlEntityRefFunc) (ent, firstNode, lastNode);
14684
* xmlSetEntityReferenceFunc:
14685
* @func: A valid function
14687
* Set the function to call call back when a xml reference has been made
14690
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14692
xmlEntityRefFunc = func;
14694
#endif /* LIBXML_LEGACY_ENABLED */
14696
/************************************************************************
14700
************************************************************************/
14702
#ifdef LIBXML_XPATH_ENABLED
14703
#include <libxml/xpath.h>
14706
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14707
static int xmlParserInitialized = 0;
14712
* Initialization function for the XML parser.
14713
* This is not reentrant. Call once before processing in case of
14714
* use in multithreaded programs.
14718
xmlInitParser(void) {
14719
if (xmlParserInitialized != 0)
14722
#ifdef LIBXML_THREAD_ENABLED
14723
__xmlGlobalInitMutexLock();
14724
if (xmlParserInitialized == 0) {
14728
if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14729
(xmlGenericError == NULL))
14730
initGenericErrorDefaultFunc(NULL);
14732
xmlInitializeDict();
14733
xmlInitCharEncodingHandlers();
14734
xmlDefaultSAXHandlerInit();
14735
xmlRegisterDefaultInputCallbacks();
14736
#ifdef LIBXML_OUTPUT_ENABLED
14737
xmlRegisterDefaultOutputCallbacks();
14738
#endif /* LIBXML_OUTPUT_ENABLED */
14739
#ifdef LIBXML_HTML_ENABLED
14740
htmlInitAutoClose();
14741
htmlDefaultSAXHandlerInit();
14743
#ifdef LIBXML_XPATH_ENABLED
14746
xmlParserInitialized = 1;
14747
#ifdef LIBXML_THREAD_ENABLED
14749
__xmlGlobalInitMutexUnlock();
14754
* xmlCleanupParser:
14756
* This function name is somewhat misleading. It does not clean up
14757
* parser state, it cleans up memory allocated by the library itself.
14758
* It is a cleanup function for the XML library. It tries to reclaim all
14759
* related global memory allocated for the library processing.
14760
* It doesn't deallocate any document related memory. One should
14761
* call xmlCleanupParser() only when the process has finished using
14762
* the library and all XML/HTML documents built with it.
14763
* See also xmlInitParser() which has the opposite function of preparing
14764
* the library for operations.
14766
* WARNING: if your application is multithreaded or has plugin support
14767
* calling this may crash the application if another thread or
14768
* a plugin is still using libxml2. It's sometimes very hard to
14769
* guess if libxml2 is in use in the application, some libraries
14770
* or plugins may use it without notice. In case of doubt abstain
14771
* from calling this function or do it just before calling exit()
14772
* to avoid leak reports from valgrind !
14776
xmlCleanupParser(void) {
14777
if (!xmlParserInitialized)
14780
xmlCleanupCharEncodingHandlers();
14781
#ifdef LIBXML_CATALOG_ENABLED
14782
xmlCatalogCleanup();
14785
xmlCleanupInputCallbacks();
14786
#ifdef LIBXML_OUTPUT_ENABLED
14787
xmlCleanupOutputCallbacks();
14789
#ifdef LIBXML_SCHEMAS_ENABLED
14790
xmlSchemaCleanupTypes();
14791
xmlRelaxNGCleanupTypes();
14793
xmlResetLastError();
14794
xmlCleanupGlobals();
14795
xmlCleanupThreads(); /* must be last if called not from the main thread */
14796
xmlCleanupMemory();
14797
xmlParserInitialized = 0;
14800
/************************************************************************
14802
* New set (2.6.0) of simpler and more flexible APIs *
14804
************************************************************************/
14810
* Free a string if it is not owned by the "dict" dictionnary in the
14813
#define DICT_FREE(str) \
14814
if ((str) && ((!dict) || \
14815
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14816
xmlFree((char *)(str));
14820
* @ctxt: an XML parser context
14822
* Reset a parser context
14825
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14827
xmlParserInputPtr input;
14835
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14836
xmlFreeInputStream(input);
14839
ctxt->input = NULL;
14842
if (ctxt->spaceTab != NULL) {
14843
ctxt->spaceTab[0] = -1;
14844
ctxt->space = &ctxt->spaceTab[0];
14846
ctxt->space = NULL;
14856
DICT_FREE(ctxt->version);
14857
ctxt->version = NULL;
14858
DICT_FREE(ctxt->encoding);
14859
ctxt->encoding = NULL;
14860
DICT_FREE(ctxt->directory);
14861
ctxt->directory = NULL;
14862
DICT_FREE(ctxt->extSubURI);
14863
ctxt->extSubURI = NULL;
14864
DICT_FREE(ctxt->extSubSystem);
14865
ctxt->extSubSystem = NULL;
14866
if (ctxt->myDoc != NULL)
14867
xmlFreeDoc(ctxt->myDoc);
14868
ctxt->myDoc = NULL;
14870
ctxt->standalone = -1;
14871
ctxt->hasExternalSubset = 0;
14872
ctxt->hasPErefs = 0;
14874
ctxt->external = 0;
14875
ctxt->instate = XML_PARSER_START;
14878
ctxt->wellFormed = 1;
14879
ctxt->nsWellFormed = 1;
14880
ctxt->disableSAX = 0;
14883
ctxt->vctxt.userData = ctxt;
14884
ctxt->vctxt.error = xmlParserValidityError;
14885
ctxt->vctxt.warning = xmlParserValidityWarning;
14887
ctxt->record_info = 0;
14889
ctxt->checkIndex = 0;
14890
ctxt->inSubset = 0;
14891
ctxt->errNo = XML_ERR_OK;
14893
ctxt->charset = XML_CHAR_ENCODING_UTF8;
14894
ctxt->catalogs = NULL;
14895
ctxt->nbentities = 0;
14896
ctxt->sizeentities = 0;
14897
ctxt->sizeentcopy = 0;
14898
xmlInitNodeInfoSeq(&ctxt->node_seq);
14900
if (ctxt->attsDefault != NULL) {
14901
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14902
ctxt->attsDefault = NULL;
14904
if (ctxt->attsSpecial != NULL) {
14905
xmlHashFree(ctxt->attsSpecial, NULL);
14906
ctxt->attsSpecial = NULL;
14909
#ifdef LIBXML_CATALOG_ENABLED
14910
if (ctxt->catalogs != NULL)
14911
xmlCatalogFreeLocal(ctxt->catalogs);
14913
if (ctxt->lastError.code != XML_ERR_OK)
14914
xmlResetError(&ctxt->lastError);
14918
* xmlCtxtResetPush:
14919
* @ctxt: an XML parser context
14920
* @chunk: a pointer to an array of chars
14921
* @size: number of chars in the array
14922
* @filename: an optional file name or URI
14923
* @encoding: the document encoding, or NULL
14925
* Reset a push parser context
14927
* Returns 0 in case of success and 1 in case of error
14930
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14931
int size, const char *filename, const char *encoding)
14933
xmlParserInputPtr inputStream;
14934
xmlParserInputBufferPtr buf;
14935
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14940
if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14941
enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14943
buf = xmlAllocParserInputBuffer(enc);
14947
if (ctxt == NULL) {
14948
xmlFreeParserInputBuffer(buf);
14952
xmlCtxtReset(ctxt);
14954
if (ctxt->pushTab == NULL) {
14955
ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14956
sizeof(xmlChar *));
14957
if (ctxt->pushTab == NULL) {
14958
xmlErrMemory(ctxt, NULL);
14959
xmlFreeParserInputBuffer(buf);
14964
if (filename == NULL) {
14965
ctxt->directory = NULL;
14967
ctxt->directory = xmlParserGetDirectory(filename);
14970
inputStream = xmlNewInputStream(ctxt);
14971
if (inputStream == NULL) {
14972
xmlFreeParserInputBuffer(buf);
14976
if (filename == NULL)
14977
inputStream->filename = NULL;
14979
inputStream->filename = (char *)
14980
xmlCanonicPath((const xmlChar *) filename);
14981
inputStream->buf = buf;
14982
xmlBufResetInput(buf->buffer, inputStream);
14984
inputPush(ctxt, inputStream);
14986
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14987
(ctxt->input->buf != NULL)) {
14988
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14989
size_t cur = ctxt->input->cur - ctxt->input->base;
14991
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14993
xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14995
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14999
if (encoding != NULL) {
15000
xmlCharEncodingHandlerPtr hdlr;
15002
if (ctxt->encoding != NULL)
15003
xmlFree((xmlChar *) ctxt->encoding);
15004
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15006
hdlr = xmlFindCharEncodingHandler(encoding);
15007
if (hdlr != NULL) {
15008
xmlSwitchToEncoding(ctxt, hdlr);
15010
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15011
"Unsupported encoding %s\n", BAD_CAST encoding);
15013
} else if (enc != XML_CHAR_ENCODING_NONE) {
15014
xmlSwitchEncoding(ctxt, enc);
15022
* xmlCtxtUseOptionsInternal:
15023
* @ctxt: an XML parser context
15024
* @options: a combination of xmlParserOption
15025
* @encoding: the user provided encoding to use
15027
* Applies the options to the parser context
15029
* Returns 0 in case of success, the set of unknown or unimplemented options
15030
* in case of error.
15033
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15037
if (encoding != NULL) {
15038
if (ctxt->encoding != NULL)
15039
xmlFree((xmlChar *) ctxt->encoding);
15040
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15042
if (options & XML_PARSE_RECOVER) {
15043
ctxt->recovery = 1;
15044
options -= XML_PARSE_RECOVER;
15045
ctxt->options |= XML_PARSE_RECOVER;
15047
ctxt->recovery = 0;
15048
if (options & XML_PARSE_DTDLOAD) {
15049
ctxt->loadsubset = XML_DETECT_IDS;
15050
options -= XML_PARSE_DTDLOAD;
15051
ctxt->options |= XML_PARSE_DTDLOAD;
15053
ctxt->loadsubset = 0;
15054
if (options & XML_PARSE_DTDATTR) {
15055
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15056
options -= XML_PARSE_DTDATTR;
15057
ctxt->options |= XML_PARSE_DTDATTR;
15059
if (options & XML_PARSE_NOENT) {
15060
ctxt->replaceEntities = 1;
15061
/* ctxt->loadsubset |= XML_DETECT_IDS; */
15062
options -= XML_PARSE_NOENT;
15063
ctxt->options |= XML_PARSE_NOENT;
15065
ctxt->replaceEntities = 0;
15066
if (options & XML_PARSE_PEDANTIC) {
15067
ctxt->pedantic = 1;
15068
options -= XML_PARSE_PEDANTIC;
15069
ctxt->options |= XML_PARSE_PEDANTIC;
15071
ctxt->pedantic = 0;
15072
if (options & XML_PARSE_NOBLANKS) {
15073
ctxt->keepBlanks = 0;
15074
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15075
options -= XML_PARSE_NOBLANKS;
15076
ctxt->options |= XML_PARSE_NOBLANKS;
15078
ctxt->keepBlanks = 1;
15079
if (options & XML_PARSE_DTDVALID) {
15080
ctxt->validate = 1;
15081
if (options & XML_PARSE_NOWARNING)
15082
ctxt->vctxt.warning = NULL;
15083
if (options & XML_PARSE_NOERROR)
15084
ctxt->vctxt.error = NULL;
15085
options -= XML_PARSE_DTDVALID;
15086
ctxt->options |= XML_PARSE_DTDVALID;
15088
ctxt->validate = 0;
15089
if (options & XML_PARSE_NOWARNING) {
15090
ctxt->sax->warning = NULL;
15091
options -= XML_PARSE_NOWARNING;
15093
if (options & XML_PARSE_NOERROR) {
15094
ctxt->sax->error = NULL;
15095
ctxt->sax->fatalError = NULL;
15096
options -= XML_PARSE_NOERROR;
15098
#ifdef LIBXML_SAX1_ENABLED
15099
if (options & XML_PARSE_SAX1) {
15100
ctxt->sax->startElement = xmlSAX2StartElement;
15101
ctxt->sax->endElement = xmlSAX2EndElement;
15102
ctxt->sax->startElementNs = NULL;
15103
ctxt->sax->endElementNs = NULL;
15104
ctxt->sax->initialized = 1;
15105
options -= XML_PARSE_SAX1;
15106
ctxt->options |= XML_PARSE_SAX1;
15108
#endif /* LIBXML_SAX1_ENABLED */
15109
if (options & XML_PARSE_NODICT) {
15110
ctxt->dictNames = 0;
15111
options -= XML_PARSE_NODICT;
15112
ctxt->options |= XML_PARSE_NODICT;
15114
ctxt->dictNames = 1;
15116
if (options & XML_PARSE_NOCDATA) {
15117
ctxt->sax->cdataBlock = NULL;
15118
options -= XML_PARSE_NOCDATA;
15119
ctxt->options |= XML_PARSE_NOCDATA;
15121
if (options & XML_PARSE_NSCLEAN) {
15122
ctxt->options |= XML_PARSE_NSCLEAN;
15123
options -= XML_PARSE_NSCLEAN;
15125
if (options & XML_PARSE_NONET) {
15126
ctxt->options |= XML_PARSE_NONET;
15127
options -= XML_PARSE_NONET;
15129
if (options & XML_PARSE_COMPACT) {
15130
ctxt->options |= XML_PARSE_COMPACT;
15131
options -= XML_PARSE_COMPACT;
15133
if (options & XML_PARSE_OLD10) {
15134
ctxt->options |= XML_PARSE_OLD10;
15135
options -= XML_PARSE_OLD10;
15137
if (options & XML_PARSE_NOBASEFIX) {
15138
ctxt->options |= XML_PARSE_NOBASEFIX;
15139
options -= XML_PARSE_NOBASEFIX;
15141
if (options & XML_PARSE_HUGE) {
15142
ctxt->options |= XML_PARSE_HUGE;
15143
options -= XML_PARSE_HUGE;
15144
if (ctxt->dict != NULL)
15145
xmlDictSetLimit(ctxt->dict, 0);
15147
if (options & XML_PARSE_OLDSAX) {
15148
ctxt->options |= XML_PARSE_OLDSAX;
15149
options -= XML_PARSE_OLDSAX;
15151
if (options & XML_PARSE_IGNORE_ENC) {
15152
ctxt->options |= XML_PARSE_IGNORE_ENC;
15153
options -= XML_PARSE_IGNORE_ENC;
15155
if (options & XML_PARSE_BIG_LINES) {
15156
ctxt->options |= XML_PARSE_BIG_LINES;
15157
options -= XML_PARSE_BIG_LINES;
15159
ctxt->linenumbers = 1;
15164
* xmlCtxtUseOptions:
15165
* @ctxt: an XML parser context
15166
* @options: a combination of xmlParserOption
15168
* Applies the options to the parser context
15170
* Returns 0 in case of success, the set of unknown or unimplemented options
15171
* in case of error.
15174
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15176
return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15181
* @ctxt: an XML parser context
15182
* @URL: the base URL to use for the document
15183
* @encoding: the document encoding, or NULL
15184
* @options: a combination of xmlParserOption
15185
* @reuse: keep the context for reuse
15187
* Common front-end for the xmlRead functions
15189
* Returns the resulting document tree or NULL
15192
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15193
int options, int reuse)
15197
xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15198
if (encoding != NULL) {
15199
xmlCharEncodingHandlerPtr hdlr;
15201
hdlr = xmlFindCharEncodingHandler(encoding);
15203
xmlSwitchToEncoding(ctxt, hdlr);
15205
if ((URL != NULL) && (ctxt->input != NULL) &&
15206
(ctxt->input->filename == NULL))
15207
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15208
xmlParseDocument(ctxt);
15209
if ((ctxt->wellFormed) || ctxt->recovery)
15213
if (ctxt->myDoc != NULL) {
15214
xmlFreeDoc(ctxt->myDoc);
15217
ctxt->myDoc = NULL;
15219
xmlFreeParserCtxt(ctxt);
15227
* @cur: a pointer to a zero terminated string
15228
* @URL: the base URL to use for the document
15229
* @encoding: the document encoding, or NULL
15230
* @options: a combination of xmlParserOption
15232
* parse an XML in-memory document and build a tree.
15234
* Returns the resulting document tree
15237
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15239
xmlParserCtxtPtr ctxt;
15244
ctxt = xmlCreateDocParserCtxt(cur);
15247
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15252
* @filename: a file or URL
15253
* @encoding: the document encoding, or NULL
15254
* @options: a combination of xmlParserOption
15256
* parse an XML file from the filesystem or the network.
15258
* Returns the resulting document tree
15261
xmlReadFile(const char *filename, const char *encoding, int options)
15263
xmlParserCtxtPtr ctxt;
15265
ctxt = xmlCreateURLParserCtxt(filename, options);
15268
return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15273
* @buffer: a pointer to a char array
15274
* @size: the size of the array
15275
* @URL: the base URL to use for the document
15276
* @encoding: the document encoding, or NULL
15277
* @options: a combination of xmlParserOption
15279
* parse an XML in-memory document and build a tree.
15281
* Returns the resulting document tree
15284
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15286
xmlParserCtxtPtr ctxt;
15288
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15291
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15296
* @fd: an open file descriptor
15297
* @URL: the base URL to use for the document
15298
* @encoding: the document encoding, or NULL
15299
* @options: a combination of xmlParserOption
15301
* parse an XML from a file descriptor and build a tree.
15302
* NOTE that the file descriptor will not be closed when the
15303
* reader is closed or reset.
15305
* Returns the resulting document tree
15308
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15310
xmlParserCtxtPtr ctxt;
15311
xmlParserInputBufferPtr input;
15312
xmlParserInputPtr stream;
15317
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15320
input->closecallback = NULL;
15321
ctxt = xmlNewParserCtxt();
15322
if (ctxt == NULL) {
15323
xmlFreeParserInputBuffer(input);
15326
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15327
if (stream == NULL) {
15328
xmlFreeParserInputBuffer(input);
15329
xmlFreeParserCtxt(ctxt);
15332
inputPush(ctxt, stream);
15333
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15338
* @ioread: an I/O read function
15339
* @ioclose: an I/O close function
15340
* @ioctx: an I/O handler
15341
* @URL: the base URL to use for the document
15342
* @encoding: the document encoding, or NULL
15343
* @options: a combination of xmlParserOption
15345
* parse an XML document from I/O functions and source and build a tree.
15347
* Returns the resulting document tree
15350
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15351
void *ioctx, const char *URL, const char *encoding, int options)
15353
xmlParserCtxtPtr ctxt;
15354
xmlParserInputBufferPtr input;
15355
xmlParserInputPtr stream;
15357
if (ioread == NULL)
15360
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15361
XML_CHAR_ENCODING_NONE);
15362
if (input == NULL) {
15363
if (ioclose != NULL)
15367
ctxt = xmlNewParserCtxt();
15368
if (ctxt == NULL) {
15369
xmlFreeParserInputBuffer(input);
15372
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15373
if (stream == NULL) {
15374
xmlFreeParserInputBuffer(input);
15375
xmlFreeParserCtxt(ctxt);
15378
inputPush(ctxt, stream);
15379
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15384
* @ctxt: an XML parser context
15385
* @cur: a pointer to a zero terminated string
15386
* @URL: the base URL to use for the document
15387
* @encoding: the document encoding, or NULL
15388
* @options: a combination of xmlParserOption
15390
* parse an XML in-memory document and build a tree.
15391
* This reuses the existing @ctxt parser context
15393
* Returns the resulting document tree
15396
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15397
const char *URL, const char *encoding, int options)
15399
xmlParserInputPtr stream;
15406
xmlCtxtReset(ctxt);
15408
stream = xmlNewStringInputStream(ctxt, cur);
15409
if (stream == NULL) {
15412
inputPush(ctxt, stream);
15413
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15418
* @ctxt: an XML parser context
15419
* @filename: a file or URL
15420
* @encoding: the document encoding, or NULL
15421
* @options: a combination of xmlParserOption
15423
* parse an XML file from the filesystem or the network.
15424
* This reuses the existing @ctxt parser context
15426
* Returns the resulting document tree
15429
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15430
const char *encoding, int options)
15432
xmlParserInputPtr stream;
15434
if (filename == NULL)
15439
xmlCtxtReset(ctxt);
15441
stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15442
if (stream == NULL) {
15445
inputPush(ctxt, stream);
15446
return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15450
* xmlCtxtReadMemory:
15451
* @ctxt: an XML parser context
15452
* @buffer: a pointer to a char array
15453
* @size: the size of the array
15454
* @URL: the base URL to use for the document
15455
* @encoding: the document encoding, or NULL
15456
* @options: a combination of xmlParserOption
15458
* parse an XML in-memory document and build a tree.
15459
* This reuses the existing @ctxt parser context
15461
* Returns the resulting document tree
15464
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15465
const char *URL, const char *encoding, int options)
15467
xmlParserInputBufferPtr input;
15468
xmlParserInputPtr stream;
15472
if (buffer == NULL)
15475
xmlCtxtReset(ctxt);
15477
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15478
if (input == NULL) {
15482
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15483
if (stream == NULL) {
15484
xmlFreeParserInputBuffer(input);
15488
inputPush(ctxt, stream);
15489
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15494
* @ctxt: an XML parser context
15495
* @fd: an open file descriptor
15496
* @URL: the base URL to use for the document
15497
* @encoding: the document encoding, or NULL
15498
* @options: a combination of xmlParserOption
15500
* parse an XML from a file descriptor and build a tree.
15501
* This reuses the existing @ctxt parser context
15502
* NOTE that the file descriptor will not be closed when the
15503
* reader is closed or reset.
15505
* Returns the resulting document tree
15508
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15509
const char *URL, const char *encoding, int options)
15511
xmlParserInputBufferPtr input;
15512
xmlParserInputPtr stream;
15519
xmlCtxtReset(ctxt);
15522
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15525
input->closecallback = NULL;
15526
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15527
if (stream == NULL) {
15528
xmlFreeParserInputBuffer(input);
15531
inputPush(ctxt, stream);
15532
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15537
* @ctxt: an XML parser context
15538
* @ioread: an I/O read function
15539
* @ioclose: an I/O close function
15540
* @ioctx: an I/O handler
15541
* @URL: the base URL to use for the document
15542
* @encoding: the document encoding, or NULL
15543
* @options: a combination of xmlParserOption
15545
* parse an XML document from I/O functions and source and build a tree.
15546
* This reuses the existing @ctxt parser context
15548
* Returns the resulting document tree
15551
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15552
xmlInputCloseCallback ioclose, void *ioctx,
15554
const char *encoding, int options)
15556
xmlParserInputBufferPtr input;
15557
xmlParserInputPtr stream;
15559
if (ioread == NULL)
15564
xmlCtxtReset(ctxt);
15566
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15567
XML_CHAR_ENCODING_NONE);
15568
if (input == NULL) {
15569
if (ioclose != NULL)
15573
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15574
if (stream == NULL) {
15575
xmlFreeParserInputBuffer(input);
15578
inputPush(ctxt, stream);
15579
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15582
#define bottom_parser
15583
#include "elfgcchack.h"