2
* HTMLtree.c : implementation of access function for an HTML tree.
4
* See Copyright for the status of this software.
12
#ifdef LIBXML_HTML_ENABLED
14
#include <string.h> /* for memset() only ! */
23
#include <libxml/xmlmemory.h>
24
#include <libxml/HTMLparser.h>
25
#include <libxml/HTMLtree.h>
26
#include <libxml/entities.h>
27
#include <libxml/valid.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/parserInternals.h>
30
#include <libxml/globals.h>
31
#include <libxml/uri.h>
33
/************************************************************************
35
* Getting/Setting encoding meta tags *
37
************************************************************************/
40
* htmlGetMetaEncoding:
43
* Encoding definition lookup in the Meta tags
45
* Returns the current encoding as flagged in the HTML source
48
htmlGetMetaEncoding(htmlDocPtr doc) {
50
const xmlChar *content;
51
const xmlChar *encoding;
61
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62
if (xmlStrEqual(cur->name, BAD_CAST"html"))
64
if (xmlStrEqual(cur->name, BAD_CAST"head"))
66
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80
if (xmlStrEqual(cur->name, BAD_CAST"head"))
82
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
93
* Search the meta elements
97
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98
if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99
xmlAttrPtr attr = cur->properties;
101
const xmlChar *value;
105
while (attr != NULL) {
106
if ((attr->children != NULL) &&
107
(attr->children->type == XML_TEXT_NODE) &&
108
(attr->children->next == NULL)) {
109
value = attr->children->content;
110
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
113
else if ((value != NULL)
114
&& (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
116
if ((http != 0) && (content != NULL))
128
encoding = xmlStrstr(content, BAD_CAST"charset=");
129
if (encoding == NULL)
130
encoding = xmlStrstr(content, BAD_CAST"Charset=");
131
if (encoding == NULL)
132
encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133
if (encoding != NULL) {
136
encoding = xmlStrstr(content, BAD_CAST"charset =");
137
if (encoding == NULL)
138
encoding = xmlStrstr(content, BAD_CAST"Charset =");
139
if (encoding == NULL)
140
encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141
if (encoding != NULL)
144
if (encoding != NULL) {
145
while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
151
* htmlSetMetaEncoding:
153
* @encoding: the encoding string
155
* Sets the current encoding in the Meta tags
156
* NOTE: this will not change the document content encoding, just
157
* the META flag associated.
159
* Returns 0 in case of success and -1 in case of error
162
htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163
htmlNodePtr cur, meta;
164
const xmlChar *content;
165
char newcontent[100];
171
if (encoding != NULL) {
172
snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
174
newcontent[sizeof(newcontent) - 1] = 0;
182
while (cur != NULL) {
183
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
184
if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
186
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
188
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
200
while (cur != NULL) {
201
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
202
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
204
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
212
if (cur->children == NULL) {
213
if (encoding == NULL)
215
meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216
xmlAddChild(cur, meta);
217
xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
218
xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
224
if (encoding != NULL) {
226
* Create a new Meta element with the right attributes
229
meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230
xmlAddPrevSibling(cur, meta);
231
xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
232
xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
236
* Search and destroy all the remaining the meta elements carrying
237
* encoding informations
239
while (cur != NULL) {
240
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
241
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
242
xmlAttrPtr attr = cur->properties;
244
const xmlChar *value;
248
while (attr != NULL) {
249
if ((attr->children != NULL) &&
250
(attr->children->type == XML_TEXT_NODE) &&
251
(attr->children->next == NULL)) {
252
value = attr->children->content;
253
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
258
if ((value != NULL) &&
259
(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
262
if ((http != 0) && (content != NULL))
267
if ((http != 0) && (content != NULL)) {
285
* These are the HTML attributes which will be output
286
* in minimized form, i.e. <option selected="selected"> will be
287
* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
290
static const char* htmlBooleanAttrs[] = {
291
"checked", "compact", "declare", "defer", "disabled", "ismap",
292
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
299
* @name: the name of the attribute to check
301
* Determine if a given attribute is a boolean attribute.
303
* returns: false if the attribute is not boolean, true otherwise.
306
htmlIsBooleanAttr(const xmlChar *name)
310
while (htmlBooleanAttrs[i] != NULL) {
311
if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
318
#ifdef LIBXML_OUTPUT_ENABLED
319
/************************************************************************
321
* Output error handlers *
323
************************************************************************/
326
* @extra: extra informations
328
* Handle an out of memory condition
331
htmlSaveErrMemory(const char *extra)
333
__xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
338
* @code: the error number
339
* @node: the location of the error.
340
* @extra: extra informations
342
* Handle an out of memory condition
345
htmlSaveErr(int code, xmlNodePtr node, const char *extra)
347
const char *msg = NULL;
350
case XML_SAVE_NOT_UTF8:
351
msg = "string is not in UTF-8\n";
353
case XML_SAVE_CHAR_INVALID:
354
msg = "invalid character value\n";
356
case XML_SAVE_UNKNOWN_ENCODING:
357
msg = "unknown encoding %s\n";
359
case XML_SAVE_NO_DOCTYPE:
360
msg = "HTML has no DOCTYPE\n";
363
msg = "unexpected error number\n";
365
__xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
368
/************************************************************************
370
* Dumping HTML tree content to a simple buffer *
372
************************************************************************/
375
htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
379
* htmlNodeDumpFormat:
380
* @buf: the HTML buffer output
382
* @cur: the current node
383
* @format: should formatting spaces been added
385
* Dump an HTML node, recursive behaviour,children are printed too.
387
* Returns the number of byte written or -1 in case of error
390
htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
394
xmlOutputBufferPtr outbuf;
402
outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
403
if (outbuf == NULL) {
404
htmlSaveErrMemory("allocating HTML output buffer");
407
memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
408
outbuf->buffer = buf;
409
outbuf->encoder = NULL;
410
outbuf->writecallback = NULL;
411
outbuf->closecallback = NULL;
412
outbuf->context = NULL;
416
htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
418
ret = buf->use - use;
424
* @buf: the HTML buffer output
426
* @cur: the current node
428
* Dump an HTML node, recursive behaviour,children are printed too,
429
* and formatting returns are added.
431
* Returns the number of byte written or -1 in case of error
434
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
437
return(htmlNodeDumpFormat(buf, doc, cur, 1));
441
* htmlNodeDumpFileFormat:
442
* @out: the FILE pointer
444
* @cur: the current node
445
* @encoding: the document encoding
446
* @format: should formatting spaces been added
448
* Dump an HTML node, recursive behaviour,children are printed too.
450
* TODO: if encoding == NULL try to save in the doc encoding
452
* returns: the number of byte written or -1 in case of failure.
455
htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
456
xmlNodePtr cur, const char *encoding, int format) {
457
xmlOutputBufferPtr buf;
458
xmlCharEncodingHandlerPtr handler = NULL;
463
if (encoding != NULL) {
466
enc = xmlParseCharEncoding(encoding);
467
if (enc != XML_CHAR_ENCODING_UTF8) {
468
handler = xmlFindCharEncodingHandler(encoding);
475
* Fallback to HTML or ASCII when the encoding is unspecified
478
handler = xmlFindCharEncodingHandler("HTML");
480
handler = xmlFindCharEncodingHandler("ascii");
483
* save the content to a temp buffer.
485
buf = xmlOutputBufferCreateFile(out, handler);
486
if (buf == NULL) return(0);
488
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
490
ret = xmlOutputBufferClose(buf);
496
* @out: the FILE pointer
498
* @cur: the current node
500
* Dump an HTML node, recursive behaviour,children are printed too,
501
* and formatting returns are added.
504
htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
505
htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
509
* htmlDocDumpMemoryFormat:
511
* @mem: OUT: the memory pointer
512
* @size: OUT: the memory length
513
* @format: should formatting spaces been added
515
* Dump an HTML document in memory and return the xmlChar * and it's size.
516
* It's up to the caller to free the memory.
519
htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
520
xmlOutputBufferPtr buf;
521
xmlCharEncodingHandlerPtr handler = NULL;
522
const char *encoding;
526
if ((mem == NULL) || (size == NULL))
534
encoding = (const char *) htmlGetMetaEncoding(cur);
536
if (encoding != NULL) {
539
enc = xmlParseCharEncoding(encoding);
540
if (enc != cur->charset) {
541
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
550
handler = xmlFindCharEncodingHandler(encoding);
551
if (handler == NULL) {
557
handler = xmlFindCharEncodingHandler(encoding);
562
* Fallback to HTML or ASCII when the encoding is unspecified
565
handler = xmlFindCharEncodingHandler("HTML");
567
handler = xmlFindCharEncodingHandler("ascii");
569
buf = xmlAllocOutputBuffer(handler);
576
htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
578
xmlOutputBufferFlush(buf);
579
if (buf->conv != NULL) {
580
*size = buf->conv->use;
581
*mem = xmlStrndup(buf->conv->content, *size);
583
*size = buf->buffer->use;
584
*mem = xmlStrndup(buf->buffer->content, *size);
586
(void)xmlOutputBufferClose(buf);
592
* @mem: OUT: the memory pointer
593
* @size: OUT: the memory length
595
* Dump an HTML document in memory and return the xmlChar * and it's size.
596
* It's up to the caller to free the memory.
599
htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
600
htmlDocDumpMemoryFormat(cur, mem, size, 1);
604
/************************************************************************
606
* Dumping HTML tree content to an I/O output buffer *
608
************************************************************************/
610
void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
614
* @buf: the HTML buffer output
616
* @encoding: the encoding string
618
* TODO: check whether encoding is needed
620
* Dump the HTML document DTD, if any.
623
htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
624
const char *encoding ATTRIBUTE_UNUSED) {
625
xmlDtdPtr cur = doc->intSubset;
628
htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
631
xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
632
xmlOutputBufferWriteString(buf, (const char *)cur->name);
633
if (cur->ExternalID != NULL) {
634
xmlOutputBufferWriteString(buf, " PUBLIC ");
635
xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
636
if (cur->SystemID != NULL) {
637
xmlOutputBufferWriteString(buf, " ");
638
xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
640
} else if (cur->SystemID != NULL) {
641
xmlOutputBufferWriteString(buf, " SYSTEM ");
642
xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
644
xmlOutputBufferWriteString(buf, ">\n");
648
* htmlAttrDumpOutput:
649
* @buf: the HTML buffer output
651
* @cur: the attribute pointer
652
* @encoding: the encoding string
654
* Dump an HTML attribute
657
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
658
const char *encoding ATTRIBUTE_UNUSED) {
662
* TODO: The html output method should not escape a & character
663
* occurring in an attribute value immediately followed by
664
* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
670
xmlOutputBufferWriteString(buf, " ");
671
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
672
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
673
xmlOutputBufferWriteString(buf, ":");
675
xmlOutputBufferWriteString(buf, (const char *)cur->name);
676
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
677
value = xmlNodeListGetString(doc, cur->children, 0);
679
xmlOutputBufferWriteString(buf, "=");
680
if ((cur->ns == NULL) && (cur->parent != NULL) &&
681
(cur->parent->ns == NULL) &&
682
((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
683
(!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
684
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
685
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
686
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
688
xmlChar *tmp = value;
690
while (IS_BLANK_CH(*tmp)) tmp++;
692
escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
693
if (escaped != NULL) {
694
xmlBufferWriteQuotedString(buf->buffer, escaped);
697
xmlBufferWriteQuotedString(buf->buffer, value);
700
xmlBufferWriteQuotedString(buf->buffer, value);
704
xmlOutputBufferWriteString(buf, "=\"\"");
710
* htmlAttrListDumpOutput:
711
* @buf: the HTML buffer output
713
* @cur: the first attribute pointer
714
* @encoding: the encoding string
716
* Dump a list of HTML attributes
719
htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
723
while (cur != NULL) {
724
htmlAttrDumpOutput(buf, doc, cur, encoding);
732
* htmlNodeListDumpOutput:
733
* @buf: the HTML buffer output
735
* @cur: the first node
736
* @encoding: the encoding string
737
* @format: should formatting spaces been added
739
* Dump an HTML node list, recursive behaviour,children are printed too.
742
htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
743
xmlNodePtr cur, const char *encoding, int format) {
747
while (cur != NULL) {
748
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
754
* htmlNodeDumpFormatOutput:
755
* @buf: the HTML buffer output
757
* @cur: the current node
758
* @encoding: the encoding string
759
* @format: should formatting spaces been added
761
* Dump an HTML node, recursive behaviour,children are printed too.
764
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
765
xmlNodePtr cur, const char *encoding, int format) {
766
const htmlElemDesc * info;
770
if ((cur == NULL) || (buf == NULL)) {
776
if (cur->type == XML_DTD_NODE)
778
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
779
(cur->type == XML_DOCUMENT_NODE)){
780
htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
783
if (cur->type == XML_ATTRIBUTE_NODE) {
784
htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
787
if (cur->type == HTML_TEXT_NODE) {
788
if (cur->content != NULL) {
789
if (((cur->name == (const xmlChar *)xmlStringText) ||
790
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
791
((cur->parent == NULL) ||
792
((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
793
(xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
796
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
797
if (buffer != NULL) {
798
xmlOutputBufferWriteString(buf, (const char *)buffer);
802
xmlOutputBufferWriteString(buf, (const char *)cur->content);
807
if (cur->type == HTML_COMMENT_NODE) {
808
if (cur->content != NULL) {
809
xmlOutputBufferWriteString(buf, "<!--");
810
xmlOutputBufferWriteString(buf, (const char *)cur->content);
811
xmlOutputBufferWriteString(buf, "-->");
815
if (cur->type == HTML_PI_NODE) {
816
if (cur->name == NULL)
818
xmlOutputBufferWriteString(buf, "<?");
819
xmlOutputBufferWriteString(buf, (const char *)cur->name);
820
if (cur->content != NULL) {
821
xmlOutputBufferWriteString(buf, " ");
822
xmlOutputBufferWriteString(buf, (const char *)cur->content);
824
xmlOutputBufferWriteString(buf, ">");
827
if (cur->type == HTML_ENTITY_REF_NODE) {
828
xmlOutputBufferWriteString(buf, "&");
829
xmlOutputBufferWriteString(buf, (const char *)cur->name);
830
xmlOutputBufferWriteString(buf, ";");
833
if (cur->type == HTML_PRESERVE_NODE) {
834
if (cur->content != NULL) {
835
xmlOutputBufferWriteString(buf, (const char *)cur->content);
841
* Get specific HTML info for that node.
844
info = htmlTagLookup(cur->name);
848
xmlOutputBufferWriteString(buf, "<");
849
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
850
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
851
xmlOutputBufferWriteString(buf, ":");
853
xmlOutputBufferWriteString(buf, (const char *)cur->name);
855
xmlNsListDumpOutput(buf, cur->nsDef);
856
if (cur->properties != NULL)
857
htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
859
if ((info != NULL) && (info->empty)) {
860
xmlOutputBufferWriteString(buf, ">");
861
if ((format) && (!info->isinline) && (cur->next != NULL)) {
862
if ((cur->next->type != HTML_TEXT_NODE) &&
863
(cur->next->type != HTML_ENTITY_REF_NODE) &&
864
(cur->parent != NULL) &&
865
(cur->parent->name != NULL) &&
866
(cur->parent->name[0] != 'p')) /* p, pre, param */
867
xmlOutputBufferWriteString(buf, "\n");
871
if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
872
(cur->children == NULL)) {
873
if ((info != NULL) && (info->saveEndTag != 0) &&
874
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
875
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
876
xmlOutputBufferWriteString(buf, ">");
878
xmlOutputBufferWriteString(buf, "></");
879
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
880
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
881
xmlOutputBufferWriteString(buf, ":");
883
xmlOutputBufferWriteString(buf, (const char *)cur->name);
884
xmlOutputBufferWriteString(buf, ">");
886
if ((format) && (cur->next != NULL) &&
887
(info != NULL) && (!info->isinline)) {
888
if ((cur->next->type != HTML_TEXT_NODE) &&
889
(cur->next->type != HTML_ENTITY_REF_NODE) &&
890
(cur->parent != NULL) &&
891
(cur->parent->name != NULL) &&
892
(cur->parent->name[0] != 'p')) /* p, pre, param */
893
xmlOutputBufferWriteString(buf, "\n");
897
xmlOutputBufferWriteString(buf, ">");
898
if ((cur->type != XML_ELEMENT_NODE) &&
899
(cur->content != NULL)) {
901
* Uses the OutputBuffer property to automatically convert
902
* invalids to charrefs
905
xmlOutputBufferWriteString(buf, (const char *) cur->content);
907
if (cur->children != NULL) {
908
if ((format) && (info != NULL) && (!info->isinline) &&
909
(cur->children->type != HTML_TEXT_NODE) &&
910
(cur->children->type != HTML_ENTITY_REF_NODE) &&
911
(cur->children != cur->last) &&
912
(cur->name != NULL) &&
913
(cur->name[0] != 'p')) /* p, pre, param */
914
xmlOutputBufferWriteString(buf, "\n");
915
htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
916
if ((format) && (info != NULL) && (!info->isinline) &&
917
(cur->last->type != HTML_TEXT_NODE) &&
918
(cur->last->type != HTML_ENTITY_REF_NODE) &&
919
(cur->children != cur->last) &&
920
(cur->name != NULL) &&
921
(cur->name[0] != 'p')) /* p, pre, param */
922
xmlOutputBufferWriteString(buf, "\n");
924
xmlOutputBufferWriteString(buf, "</");
925
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
926
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
927
xmlOutputBufferWriteString(buf, ":");
929
xmlOutputBufferWriteString(buf, (const char *)cur->name);
930
xmlOutputBufferWriteString(buf, ">");
931
if ((format) && (info != NULL) && (!info->isinline) &&
932
(cur->next != NULL)) {
933
if ((cur->next->type != HTML_TEXT_NODE) &&
934
(cur->next->type != HTML_ENTITY_REF_NODE) &&
935
(cur->parent != NULL) &&
936
(cur->parent->name != NULL) &&
937
(cur->parent->name[0] != 'p')) /* p, pre, param */
938
xmlOutputBufferWriteString(buf, "\n");
943
* htmlNodeDumpOutput:
944
* @buf: the HTML buffer output
946
* @cur: the current node
947
* @encoding: the encoding string
949
* Dump an HTML node, recursive behaviour,children are printed too,
950
* and formatting returns/spaces are added.
953
htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
954
xmlNodePtr cur, const char *encoding) {
955
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
959
* htmlDocContentDumpFormatOutput:
960
* @buf: the HTML buffer output
962
* @encoding: the encoding string
963
* @format: should formatting spaces been added
965
* Dump an HTML document.
968
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
969
const char *encoding, int format) {
974
if ((buf == NULL) || (cur == NULL))
978
* force to output the stuff as HTML, especially for entities
981
cur->type = XML_HTML_DOCUMENT_NODE;
982
if (cur->intSubset != NULL) {
983
htmlDtdDumpOutput(buf, cur, NULL);
985
if (cur->children != NULL) {
986
htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
988
xmlOutputBufferWriteString(buf, "\n");
989
cur->type = (xmlElementType) type;
993
* htmlDocContentDumpOutput:
994
* @buf: the HTML buffer output
996
* @encoding: the encoding string
998
* Dump an HTML document. Formating return/spaces are added.
1001
htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1002
const char *encoding) {
1003
htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1006
/************************************************************************
1008
* Saving functions front-ends *
1010
************************************************************************/
1015
* @cur: the document
1017
* Dump an HTML document to an open FILE.
1019
* returns: the number of byte written or -1 in case of failure.
1022
htmlDocDump(FILE *f, xmlDocPtr cur) {
1023
xmlOutputBufferPtr buf;
1024
xmlCharEncodingHandlerPtr handler = NULL;
1025
const char *encoding;
1030
if ((cur == NULL) || (f == NULL)) {
1034
encoding = (const char *) htmlGetMetaEncoding(cur);
1036
if (encoding != NULL) {
1037
xmlCharEncoding enc;
1039
enc = xmlParseCharEncoding(encoding);
1040
if (enc != cur->charset) {
1041
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1048
handler = xmlFindCharEncodingHandler(encoding);
1049
if (handler == NULL)
1052
handler = xmlFindCharEncodingHandler(encoding);
1057
* Fallback to HTML or ASCII when the encoding is unspecified
1059
if (handler == NULL)
1060
handler = xmlFindCharEncodingHandler("HTML");
1061
if (handler == NULL)
1062
handler = xmlFindCharEncodingHandler("ascii");
1064
buf = xmlOutputBufferCreateFile(f, handler);
1065
if (buf == NULL) return(-1);
1066
htmlDocContentDumpOutput(buf, cur, NULL);
1068
ret = xmlOutputBufferClose(buf);
1074
* @filename: the filename (or URL)
1075
* @cur: the document
1077
* Dump an HTML document to a file. If @filename is "-" the stdout file is
1079
* returns: the number of byte written or -1 in case of failure.
1082
htmlSaveFile(const char *filename, xmlDocPtr cur) {
1083
xmlOutputBufferPtr buf;
1084
xmlCharEncodingHandlerPtr handler = NULL;
1085
const char *encoding;
1088
if ((cur == NULL) || (filename == NULL))
1093
encoding = (const char *) htmlGetMetaEncoding(cur);
1095
if (encoding != NULL) {
1096
xmlCharEncoding enc;
1098
enc = xmlParseCharEncoding(encoding);
1099
if (enc != cur->charset) {
1100
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1107
handler = xmlFindCharEncodingHandler(encoding);
1108
if (handler == NULL)
1114
* Fallback to HTML or ASCII when the encoding is unspecified
1116
if (handler == NULL)
1117
handler = xmlFindCharEncodingHandler("HTML");
1118
if (handler == NULL)
1119
handler = xmlFindCharEncodingHandler("ascii");
1122
* save the content to a temp buffer.
1124
buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1125
if (buf == NULL) return(0);
1127
htmlDocContentDumpOutput(buf, cur, NULL);
1129
ret = xmlOutputBufferClose(buf);
1134
* htmlSaveFileFormat:
1135
* @filename: the filename
1136
* @cur: the document
1137
* @format: should formatting spaces been added
1138
* @encoding: the document encoding
1140
* Dump an HTML document to a file using a given encoding.
1142
* returns: the number of byte written or -1 in case of failure.
1145
htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1146
const char *encoding, int format) {
1147
xmlOutputBufferPtr buf;
1148
xmlCharEncodingHandlerPtr handler = NULL;
1151
if ((cur == NULL) || (filename == NULL))
1156
if (encoding != NULL) {
1157
xmlCharEncoding enc;
1159
enc = xmlParseCharEncoding(encoding);
1160
if (enc != cur->charset) {
1161
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1168
handler = xmlFindCharEncodingHandler(encoding);
1169
if (handler == NULL)
1171
htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1174
htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1178
* Fallback to HTML or ASCII when the encoding is unspecified
1180
if (handler == NULL)
1181
handler = xmlFindCharEncodingHandler("HTML");
1182
if (handler == NULL)
1183
handler = xmlFindCharEncodingHandler("ascii");
1186
* save the content to a temp buffer.
1188
buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1189
if (buf == NULL) return(0);
1191
htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1193
ret = xmlOutputBufferClose(buf);
1199
* @filename: the filename
1200
* @cur: the document
1201
* @encoding: the document encoding
1203
* Dump an HTML document to a file using a given encoding
1204
* and formatting returns/spaces are added.
1206
* returns: the number of byte written or -1 in case of failure.
1209
htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1210
return(htmlSaveFileFormat(filename, cur, encoding, 1));
1213
#endif /* LIBXML_OUTPUT_ENABLED */
1215
#define bottom_HTMLtree
1216
#include "elfgcchack.h"
1217
#endif /* LIBXML_HTML_ENABLED */