236
256
create string with merged properties
238
static char *AddProperty(char *style, char *property)
257
static tmbstr AddProperty( ctmbstr style, ctmbstr property )
242
prop = CreateProps(null, style);
262
prop = CreateProps(NULL, style);
243
263
prop = CreateProps(prop, property);
244
style = CreatePropString(prop);
264
line = CreatePropString(prop);
245
265
FreeStyleProps(prop);
249
void FreeStyles(Lexer *lexer)
270
void FreeStyles( TidyDocImpl* doc )
253
for (style = lexer->styles; style; style = next)
272
Lexer* lexer = doc->lexer;
258
MemFree(style->tag_class);
259
MemFree(style->properties);
275
TagStyle *style, *next;
276
for ( style = lexer->styles; style; style = next )
279
MemFree( style->tag );
280
MemFree( style->tag_class );
281
MemFree( style->properties );
264
static char *GensymClass(char *tag)
287
static tmbstr GensymClass( TidyDocImpl* doc )
267
char buf[512]; /* CSSPrefix is limited to 256 characters */
289
tmbchar buf[512]; /* CSSPrefix is limited to 256 characters */
290
ctmbstr pfx = cfgStr(doc, TidyCSSPrefix);
291
if ( pfx == NULL || *pfx == 0 )
269
sprintf(buf, "%s%d", (CSSPrefix ? CSSPrefix : "c"), n++); /* #508936 - CSS class naming for -clean option */
294
tmbsnprintf(buf, sizeof(buf), "%s%u", pfx, ++doc->nClassId );
295
return tmbstrdup(buf);
273
static char *FindStyle(Lexer *lexer, char *tag, char *properties)
298
static ctmbstr FindStyle( TidyDocImpl* doc, ctmbstr tag, ctmbstr properties )
300
Lexer* lexer = doc->lexer;
277
303
for (style = lexer->styles; style; style=style->next)
279
if (wstrcmp(style->tag, tag) == 0 &&
280
wstrcmp(style->properties, properties) == 0)
305
if (tmbstrcmp(style->tag, tag) == 0 &&
306
tmbstrcmp(style->properties, properties) == 0)
281
307
return style->tag_class;
284
style = (Style *)MemAlloc(sizeof(Style));
285
style->tag = wstrdup(tag);
286
style->tag_class = GensymClass(tag);
287
style->properties = wstrdup(properties);
310
style = (TagStyle *)MemAlloc( sizeof(TagStyle) );
311
style->tag = tmbstrdup(tag);
312
style->tag_class = GensymClass( doc );
313
style->properties = tmbstrdup( properties );
288
314
style->next = lexer->styles;
289
315
lexer->styles = style;
290
316
return style->tag_class;
326
352
Assumes that node doesn't have a class attribute
328
static void Style2Rule(Lexer *lexer, Node *node)
354
static void Style2Rule( TidyDocImpl* doc, Node *node)
330
356
AttVal *styleattr, *classattr;
333
styleattr = GetAttrByName(node, "style");
359
styleattr = AttrGetById(node, TidyAttr_STYLE);
337
classname = FindStyle(lexer, node->element, styleattr->value);
338
classattr = GetAttrByName(node, "class");
363
/* fix for http://tidy.sf.net/bug/850215 */
364
if (!styleattr->value)
366
RemoveAttribute(doc, node, styleattr);
370
classname = FindStyle( doc, node->element, styleattr->value );
371
classattr = AttrGetById(node, TidyAttr_CLASS);
341
374
if there already is a class attribute
342
then append class name after a space
375
then append class name after an underscore
346
int len = wstrlen(classattr->value) +
347
wstrlen(classname) + 2;
348
char *s = (char *)MemAlloc(len *sizeof(char)); /* #427668 - was malloc() - fix by Arnaud BERCEGEAY 05 Aug 00 */
349
wstrcpy(s, classattr->value);
351
wstrcat(s, classname);
352
MemFree(classattr->value);
379
uint len = tmbstrlen(classattr->value) +
380
tmbstrlen(classname) + 2;
381
tmbstr s = (tmbstr) MemAlloc( len );
383
if (classattr->value)
385
tmbstrcpy(s, classattr->value);
388
tmbstrcat(s, classname);
389
if (classattr->value)
390
MemFree(classattr->value);
353
391
classattr->value = s;
354
RemoveAttribute(node, styleattr);
392
RemoveAttribute( doc, node, styleattr );
356
394
else /* reuse style attribute for class attribute */
358
396
MemFree(styleattr->attribute);
359
397
MemFree(styleattr->value);
360
styleattr->attribute = wstrdup("class");
361
styleattr->value = wstrdup(classname);
398
styleattr->attribute = tmbstrdup("class");
399
styleattr->value = tmbstrdup(classname);
366
static void AddColorRule(Lexer *lexer, char *selector, char *color)
404
static void AddColorRule( Lexer* lexer, ctmbstr selector, ctmbstr color )
406
if ( selector && color )
370
408
AddStringLiteral(lexer, selector);
371
409
AddStringLiteral(lexer, " { color: ");
449
479
AddStringLiteral(lexer, " }\n");
452
attr = GetAttrByName(body, "link");
482
if (NULL != (attr = AttrGetById(body, TidyAttr_LINK)))
456
484
AddColorRule(lexer, " :link", attr->value);
457
RemoveAttribute(body, attr);
485
RemoveAttribute( doc, body, attr );
460
attr = GetAttrByName(body, "vlink");
488
if (NULL != (attr = AttrGetById(body, TidyAttr_VLINK)))
464
490
AddColorRule(lexer, " :visited", attr->value);
465
RemoveAttribute(body, attr);
491
RemoveAttribute( doc, body, attr );
468
attr = GetAttrByName(body, "alink");
494
if (NULL != (attr = AttrGetById(body, TidyAttr_ALINK)))
472
496
AddColorRule(lexer, " :active", attr->value);
473
RemoveAttribute(body, attr);
497
RemoveAttribute( doc, body, attr );
477
static Bool NiceBody(Lexer *lexer, Node *doc)
501
static Bool NiceBody( TidyDocImpl* doc )
479
Node *body = FindBody(doc);
503
Node* node = FindBody(doc);
484
GetAttrByName(body, "background") ||
485
GetAttrByName(body, "bgcolor") ||
486
GetAttrByName(body, "text") ||
487
GetAttrByName(body, "link") ||
488
GetAttrByName(body, "vlink") ||
489
GetAttrByName(body, "alink")
506
if (AttrGetById(node, TidyAttr_BACKGROUND) ||
507
AttrGetById(node, TidyAttr_BGCOLOR) ||
508
AttrGetById(node, TidyAttr_TEXT) ||
509
AttrGetById(node, TidyAttr_LINK) ||
510
AttrGetById(node, TidyAttr_VLINK) ||
511
AttrGetById(node, TidyAttr_ALINK))
492
lexer->badLayout |= USING_BODY;
513
doc->badLayout |= USING_BODY;
857
sprintf(buf, "%d%%", (int)(x));
838
/* Add 0.001 to avoid roundoff error - see #1004512 */
839
tmbsnprintf(buf, count, "%d%%", (int)(x+0.001));
862
843
return "larger"; /* "140%" */
865
static void AddFontFace(Node *node, char *face)
846
static void AddFontFace( TidyDocImpl* doc, Node *node, ctmbstr face )
869
sprintf(buf, "font-family: %s", face);
870
AddStyleProperty(node, buf);
849
tmbsnprintf(buf, sizeof(buf), "font-family: %s", face );
850
AddStyleProperty( doc, node, buf );
873
static void AddFontSize(Node *node, char *size)
853
static void AddFontSize( TidyDocImpl* doc, Node* node, ctmbstr size )
875
char *value, buf[1024];
877
if (wstrcmp(size, "6") == 0 && node->tag == tag_p)
879
MemFree(node->element);
880
node->element = wstrdup("h1");
885
if (wstrcmp(size, "5") == 0 && node->tag == tag_p)
887
MemFree(node->element);
888
node->element = wstrdup("h2");
893
if (wstrcmp(size, "4") == 0 && node->tag == tag_p)
895
MemFree(node->element);
896
node->element = wstrdup("h3");
901
value = FontSize2Name(size);
855
tmbchar work[ 32 ] = {0};
856
ctmbstr value = NULL;
860
if (tmbstrcmp(size, "6") == 0)
862
else if (tmbstrcmp(size, "5") == 0)
864
else if (tmbstrcmp(size, "4") == 0)
869
MemFree(node->element);
870
node->element = tmbstrdup(value);
876
value = FontSize2Name(size, work, sizeof(work) - 1);
905
sprintf(buf, "font-size: %s", value);
906
AddStyleProperty(node, buf);
910
static void AddFontColor(Node *node, char *color)
914
sprintf(buf, "color: %s", color);
915
AddStyleProperty(node, buf);
918
static void AddAlign(Node *node, char *align)
920
char buf[1024], *p, *q;
922
/* force alignment value to lower case */
923
for (p = buf, q = "text-align: "; (*p++ = *q++););
924
for (p = p-1; (*p++ = ToLower(*align++)););
925
AddStyleProperty(node, buf);
881
tmbsnprintf(buf, sizeof(buf), "font-size: %s", value);
882
AddStyleProperty( doc, node, buf );
886
static void AddFontColor( TidyDocImpl* doc, Node *node, ctmbstr color)
889
tmbsnprintf(buf, sizeof(buf), "color: %s", color);
890
AddStyleProperty( doc, node, buf );
893
/* force alignment value to lower case */
894
static void AddAlign( TidyDocImpl* doc, Node *node, ctmbstr align )
899
tmbstrcpy( buf, "text-align: " );
900
for ( i = 12; i < sizeof(buf)/sizeof(buf[0])-1; ++i )
902
if ( (buf[i] = (tmbchar)ToLower(*align++)) == '\0' )
906
AddStyleProperty( doc, node, buf );
929
910
add style properties to node corresponding to
930
911
the font face, size and color attributes
932
static void AddFontStyles(Node *node, AttVal *av)
913
static void AddFontStyles( TidyDocImpl* doc, Node *node, AttVal *av)
936
if (wstrcmp(av->attribute, "face") == 0)
937
AddFontFace(node, av->value);
938
else if (wstrcmp(av->attribute, "size") == 0)
939
AddFontSize(node, av->value);
940
else if (wstrcmp(av->attribute, "color") == 0)
941
AddFontColor(node, av->value);
917
if (AttrHasValue(av))
920
AddFontFace( doc, node, av->value );
921
else if (attrIsSIZE(av))
922
AddFontSize( doc, node, av->value );
923
else if (attrIsCOLOR(av))
924
AddFontColor( doc, node, av->value );
1060
1006
Action: replace <center> by <div style="text-align: center">
1063
static Bool Center2Div(Lexer *lexer, Node *node, Node **pnode)
1009
static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
1065
if (node->tag == tag_center)
1011
if ( nodeIsCENTER(node) )
1013
if ( cfgBool(doc, TidyDropFontTags) )
1069
1015
if (node->content)
1071
Node *last = node->last, *parent = node->parent;
1073
DiscardContainer(node, pnode);
1075
node = InferredTag(lexer, "br");
1078
last->next->prev = node;
1080
node->next = last->next;
1084
if (parent->last == last)
1085
parent->last = node;
1087
node->parent = parent;
1017
Node *last = node->last;
1018
DiscardContainer( doc, node, pnode );
1020
node = InferredTag(doc, TidyTag_BR);
1021
InsertNodeAfterElement(last, node);
1091
Node *prev = node->prev, *next = node->next, *parent = node->parent;
1092
DiscardContainer(node, pnode);
1094
node = InferredTag(lexer, "br");
1097
node->parent = parent;
1025
Node *prev = node->prev, *next = node->next,
1026
*parent = node->parent;
1027
DiscardContainer( doc, node, pnode );
1029
node = InferredTag(doc, TidyTag_BR);
1102
parent->last = node;
1107
parent->content = node;
1031
InsertNodeBeforeElement(next, node);
1033
InsertNodeAfterElement(prev, node);
1035
InsertNodeAtStart(parent, node);
1113
node->tag = tag_div;
1114
MemFree(node->element);
1115
node->element = wstrdup("div");
1116
AddStyleProperty(node, "text-align: center");
1041
RenameElem( node, TidyTag_DIV );
1042
AddStyleProperty( doc, node, "text-align: center" );
1049
/* Copy child attributes to node. Duplicate attributes are overwritten.
1050
Unique attributes (such as ID) disable the action.
1051
Attributes style and class are not dealt with. A call to MergeStyles
1054
static Bool CopyAttrs( TidyDocImpl* doc, Node *node, Node *child)
1059
/* Detect attributes that cannot be merged or overwritten. */
1060
if (AttrGetById(child, TidyAttr_ID) != NULL
1061
&& AttrGetById(node, TidyAttr_ID) != NULL)
1064
/* Move child attributes to node. Attributes in node
1065
can be overwritten or merged. */
1066
for (av2 = child->attributes; av2; )
1068
/* Dealt by MergeStyles. */
1069
if (attrIsSTYLE(av2) || attrIsCLASS(av2))
1074
/* Avoid duplicates in node */
1075
if ((id=AttrId(av2)) != TidyAttr_UNKNOWN
1076
&& (av1=AttrGetById(node, id))!= NULL)
1077
RemoveAttribute( doc, node, av1 );
1079
/* Move attribute from child to node */
1080
DetachAttribute( child, av2 );
1084
InsertAttributeAtEnd( node, av1 );
1124
Symptom <div><div>...</div></div>
1125
Action: merge the two divs
1091
Symptom <XX><XX>...</XX></XX>
1092
Action: merge the two XXs
1127
This is useful after nested <dir>s used by Word
1094
For instance, this is useful after nested <dir>s used by Word
1128
1095
for indenting have been converted to <div>s
1097
If state is "no", no merging.
1098
If state is "yes", inner element is discarded. Only Style and Class
1099
attributes are merged using MergeStyles().
1100
If state is "auto", atttibutes are merged as described in CopyAttrs().
1101
Style and Class attributes are merged using MergeStyles().
1130
static Bool MergeDivs(Lexer *lexer, Node *node, Node **pnode)
1103
static Bool MergeNestedElements( TidyDocImpl* doc,
1104
TidyTagId Id, TidyTriState state, Node *node,
1105
Node **ARG_UNUSED(pnode))
1134
if (node->tag != tag_div)
1109
if ( state == TidyNoState
1110
|| !TagIsId(node, Id) )
1137
1113
child = node->content;
1142
if (child->tag != tag_div)
1145
if (child->next != null)
1148
MergeStyles(node, child);
1149
StripOnlyChild(node);
1116
|| child->next != NULL
1117
|| !TagIsId(child, Id) )
1120
if ( state == TidyAutoState
1121
&& CopyAttrs(doc, node, child) == no )
1124
MergeStyles( doc, node, child );
1125
StripOnlyChild( doc, node );
1354
1342
the font element's attributes and replacing them
1355
1343
by a single style attribute.
1357
static Bool Font2Span(Lexer *lexer, Node *node, Node **pnode)
1345
static Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode )
1359
1347
AttVal *av, *style, *next;
1361
if (node->tag == tag_font)
1349
if ( nodeIsFONT(node) )
1351
if ( cfgBool(doc, TidyDropFontTags) )
1365
DiscardContainer(node, pnode);
1353
DiscardContainer( doc, node, pnode );
1369
/* if FONT is only child of parent element then leave alone */
1370
if (node->parent->content == node
1371
&& node->next == null)
1357
/* if FONT is only child of parent element then leave alone
1358
Do so only if BlockStyle may be succesful. */
1359
if ( node->parent->content == node && node->next == NULL &&
1360
CanApplyBlockStyle(node->parent) )
1374
AddFontStyles(node, node->attributes);
1363
AddFontStyles( doc, node, node->attributes );
1376
1365
/* extract style attribute and free the rest */
1377
1366
av = node->attributes;
1382
1371
next = av->next;
1384
if (wstrcmp(av->attribute, "style") == 0)
1373
if (attrIsSTYLE(av))
1392
MemFree(av->attribute);
1380
FreeAttribute( doc, av );
1402
1385
node->attributes = style;
1404
node->tag = tag_span;
1405
MemFree(node->element);
1406
node->element = wstrdup("span");
1386
RenameElem( node, TidyTag_SPAN );
1414
static Bool IsElement(Node *node)
1416
return (node->type == StartTag || node->type == StartEndTag ? yes : no);
1420
1394
Applies all matching rules to a node.
1422
Node *CleanNode(Lexer *lexer, Node *node)
1396
Node* CleanNode( TidyDocImpl* doc, Node *node )
1399
TidyTriState mergeDivs = cfgAutoBool(doc, TidyMergeDivs);
1426
for (next = node; node && IsElement(node); node = next)
1401
for (next = node; nodeIsElement(node); node = next)
1428
if (Dir2Div(lexer, node, &next))
1403
if ( Dir2Div(doc, node, &next) )
1431
1406
/* Special case: true result means
1460
1435
/* Special case: if the current node is destroyed by
1461
** CleanNode() lower in the tree, this node and its
1462
** parent no longer exist. So we must jump back up
1463
** the CreateStyleProperties() call stack until we
1464
** have a valid node reference.
1436
** CleanNode() lower in the tree, this node and its parent
1437
** no longer exist. So we must jump back up the CleanTree()
1438
** call stack until we have a valid node reference.
1467
static Node *CreateStyleProperties(Lexer *lexer, Node *node, Node** prepl)
1441
static Node* CleanTree( TidyDocImpl* doc, Node *node )
1471
1443
if (node->content)
1474
for (child = node->content; child != null; child = child->next)
1446
for (child = node->content; child != NULL; child = child->next)
1476
child = CreateStyleProperties(lexer, child, &repl);
1448
child = CleanTree( doc, child );
1482
return CleanNode(lexer, node);
1454
return CleanNode( doc, node );
1485
static void DefineStyleRules(Lexer *lexer, Node *node)
1457
static void DefineStyleRules( TidyDocImpl* doc, Node *node )
1489
1461
if (node->content)
1491
1463
for (child = node->content;
1492
child != null; child = child->next)
1464
child != NULL; child = child->next)
1494
DefineStyleRules(lexer, child);
1466
DefineStyleRules( doc, child );
1498
Style2Rule(lexer, node);
1470
Style2Rule( doc, node );
1501
void CleanTree(Lexer *lexer, Node *doc)
1473
void CleanDocument( TidyDocImpl* doc )
1504
doc = CreateStyleProperties(lexer, doc, &repl);
1475
/* placeholder. CleanTree()/CleanNode() will not
1478
CleanTree( doc, &doc->root );
1480
if ( cfgBool(doc, TidyMakeClean) )
1508
DefineStyleRules(lexer, doc);
1509
CreateStyleElement(lexer, doc);
1482
DefineStyleRules( doc, &doc->root );
1483
CreateStyleElement( doc );
1513
1487
/* simplifies <b><b> ... </b> ...</b> etc. */
1514
void NestedEmphasis(Node *node)
1488
void NestedEmphasis( TidyDocImpl* doc, Node* node )
1596
static char indent_buf[32];
1599
1564
Replace implicit blockquote by div with an indent
1600
1565
taking care to reduce nested blockquotes to a single
1601
1566
div with the indent set to match the nesting depth
1603
void BQ2Div(Node *node)
1568
void BQ2Div( TidyDocImpl* doc, Node *node )
1570
tmbchar indent_buf[ 32 ];
1611
if (node->tag == tag_blockquote && node->implicit)
1575
if ( nodeIsBLOCKQUOTE(node) && node->implicit )
1615
while(HasOneChild(node) &&
1616
node->content->tag == tag_blockquote &&
1579
while( HasOneChild(node) &&
1580
nodeIsBLOCKQUOTE(node->content) &&
1620
StripOnlyChild(node);
1584
StripOnlyChild( doc, node );
1623
1587
if (node->content)
1624
BQ2Div(node->content);
1626
len = sprintf(indent_buf, "margin-left: %dem", 2*indent);
1628
MemFree(node->element);
1629
node->element = wstrdup(tag_div->name);
1630
node->tag = tag_div;
1632
attval = GetAttrByName(node, "style");
1638
s = (char *)MemAlloc(len + 3 + wstrlen(attval->value));
1640
wstrcpy(s, indent_buf);
1642
wstrcat(s, attval->value);
1644
MemFree(attval->value);
1650
AddAttribute(node, "style", indent_buf);
1588
BQ2Div( doc, node->content );
1590
tmbsnprintf(indent_buf, sizeof(indent_buf), "margin-left: %dem",
1593
RenameElem( node, TidyTag_DIV );
1594
AddStyleProperty(doc, node, indent_buf );
1653
1596
else if (node->content)
1654
BQ2Div(node->content);
1597
BQ2Div( doc, node->content );
1657
1599
node = node->next;
1662
Node *FindEnclosingCell(Node *node)
1604
Node* FindEnclosingCell( TidyDocImpl* ARG_UNUSED(doc), Node *node)
1666
1608
for ( check=node; check; check = check->parent )
1668
if ( check->tag == tag_td )
1610
if ( nodeIsTD(check) )
1674
1616
/* node is <![if ...]> prune up to <![endif]> */
1675
static Node *PruneSection(Lexer *lexer, Node *node)
1617
static Node* PruneSection( TidyDocImpl* doc, Node *node )
1619
Lexer* lexer = doc->lexer;
1679
if (wstrncmp(lexer->lexbuf + node->start, "if !supportEmptyParas", 21) == 0)
1623
ctmbstr lexbuf = lexer->lexbuf + node->start;
1624
if ( tmbstrncmp(lexbuf, "if !supportEmptyParas", 21) == 0 )
1681
Node* cell = FindEnclosingCell( node );
1626
Node* cell = FindEnclosingCell( doc, node );
1684
/* Need to put into cell so it doesn't look weird */
1685
char onesixty[2] = { (char) 160, (char)0 };
1686
Node* nbsp = NewLiteralTextNode( lexer, onesixty );
1629
/* Need to put into cell so it doesn't look weird
1631
Node* nbsp = NewLiteralTextNode( lexer, "\240" );
1632
assert( (byte)'\240' == (byte)160 );
1687
1633
InsertNodeBeforeElement( node, nbsp );
1691
1637
/* discard node and returns next */
1692
node = DiscardElement(node);
1638
node = DiscardElement( doc, node );
1697
1643
if (node->type == SectionTag)
1699
if (wstrncmp(lexer->lexbuf + node->start, "if", 2) == 0)
1645
if (tmbstrncmp(lexer->lexbuf + node->start, "if", 2) == 0)
1701
node = PruneSection(lexer, node);
1647
node = PruneSection( doc, node );
1705
if (wstrncmp(lexer->lexbuf + node->start, "endif", 5) == 0)
1651
if (tmbstrncmp(lexer->lexbuf + node->start, "endif", 5) == 0)
1707
node = DiscardElement(node);
1653
node = DiscardElement( doc, node );
2048
1994
/* map sequence of <p class="Code"> to <pre>...</pre> */
2049
else if (attr && wstrcmp(attr->value, "Code") == 0)
1995
else if (AttrValueIs(attr, "Code"))
2051
1997
Node *br = NewLineNode(lexer);
2052
NormalizeSpaces(lexer, node);
1998
NormalizeSpaces(lexer, node->content);
2054
if (!list || list->tag != tag_pre)
2000
if ( !list || TagId(list) != TidyTag_PRE )
2056
list = InferredTag(lexer, "pre");
2002
list = InferredTag(doc, TidyTag_PRE);
2057
2003
InsertNodeBeforeElement(node, list);
2060
2006
/* remove node and append to contents of list */
2061
2007
RemoveNode(node);
2062
2008
InsertNodeAtEnd(list, node);
2063
StripSpan(lexer, node);
2009
StripSpan( doc, node );
2064
2010
InsertNodeAtEnd(list, br);
2065
2011
node = list->next;
2073
2022
/* strip out style and class attributes */
2074
if (node->type == StartTag || node->type == StartEndTag)
2075
PurgeWord2000Attributes(node);
2023
if (nodeIsElement(node))
2024
PurgeWord2000Attributes( doc, node );
2077
2026
if (node->content)
2078
CleanWord2000(lexer, node->content);
2027
CleanWord2000( doc, node->content );
2080
2029
node = node->next;
2084
Bool IsWord2000(Node *root)
2033
Bool IsWord2000( TidyDocImpl* doc )
2086
2035
AttVal *attval;
2087
2036
Node *node, *head;
2088
Node *html = FindHTML(root);
2037
Node *html = FindHTML( doc );
2090
2039
if (html && GetAttrByName(html, "xmlns:o"))
2093
2042
/* search for <meta name="GENERATOR" content="Microsoft ..."> */
2094
head = FindHEAD(root);
2043
head = FindHEAD( doc );
2098
2047
for (node = head->content; node; node = node->next)
2100
if (node->tag != tag_meta)
2103
attval = GetAttrByName(node, "name");
2105
if (attval == null || attval->value == null)
2108
if (wstrcasecmp(attval->value, "generator") != 0)
2111
attval = GetAttrByName(node, "content");
2113
if (attval == null || attval->value == null)
2116
if (wsubstr(attval->value, "Microsoft"))
2049
if ( !nodeIsMETA(node) )
2052
attval = AttrGetById( node, TidyAttr_NAME );
2054
if ( !AttrValueIs(attval, "generator") )
2057
attval = AttrGetById( node, TidyAttr_CONTENT );
2059
if ( AttrContains(attval, "Microsoft") )
2124
2067
/* where appropriate move object elements from head to body */
2125
void BumpObject(Lexer *lexer, Node *html)
2068
void BumpObject( TidyDocImpl* doc, Node *html )
2127
Node *node, *next, *head = null, *body = null;
2129
for (node = html->content; node != null; node = node->next)
2070
Node *node, *next, *head = NULL, *body = NULL;
2075
for ( node = html->content; node != NULL; node = node->next )
2131
if (node->tag == tag_head)
2077
if ( nodeIsHEAD(node) )
2134
if (node->tag == tag_body)
2080
if ( nodeIsBODY(node) )
2138
if (head != null && body != null)
2084
if ( head != NULL && body != NULL )
2140
for (node = head->content; node != null; node = next)
2086
for (node = head->content; node != NULL; node = next)
2142
2088
next = node->next;
2144
if (node->tag == tag_object)
2090
if ( nodeIsOBJECT(node) )
2147
2093
Bool bump = no;
2149
for (child = node->content; child != null; child = child->next)
2095
for (child = node->content; child != NULL; child = child->next)
2151
2097
/* bump to body unless content is param */
2152
if ((child->type == TextNode && !IsBlank(lexer, node))
2153
|| child->tag != tag_param)
2098
if ( (nodeIsText(child) && !IsBlank(doc->lexer, node))
2099
|| !nodeIsPARAM(child) )
2163
InsertNodeAtStart(body, node);
2109
InsertNodeAtStart( body, node );
2116
/* This is disabled due to http://tidy.sf.net/bug/681116 */
2118
void FixBrakes( TidyDocImpl* pDoc, Node *pParent )
2121
Bool bBRDeleted = no;
2123
if (NULL == pParent)
2126
/* First, check the status of All My Children */
2127
pNode = pParent->content;
2128
while (NULL != pNode )
2130
/* The node may get trimmed, so save the next pointer, if any */
2131
Node *pNext = pNode->next;
2132
FixBrakes( pDoc, pNode );
2137
/* As long as my last child is a <br />, move it to my last peer */
2138
if ( nodeCMIsBlock( pParent ))
2140
for ( pNode = pParent->last;
2141
NULL != pNode && nodeIsBR( pNode );
2142
pNode = pParent->last )
2144
if ( NULL == pNode->attributes && no == bBRDeleted )
2146
DiscardElement( pDoc, pNode );
2151
RemoveNode( pNode );
2152
InsertNodeAfterElement( pParent, pNode );
2155
TrimEmptyElement( pDoc, pParent );
2160
void VerifyHTTPEquiv(TidyDocImpl* pDoc, Node *head)
2163
StyleProp *pFirstProp = NULL, *pLastProp = NULL, *prop = NULL;
2164
tmbstr s, pszBegin, pszEnd;
2165
ctmbstr enc = GetEncodingNameFromTidyId(cfg(pDoc, TidyOutCharEncoding));
2170
if (!nodeIsHEAD(head))
2171
head = FindHEAD(pDoc);
2176
/* Find any <meta http-equiv='Content-Type' content='...' /> */
2177
for (pNode = head->content; NULL != pNode; pNode = pNode->next)
2179
AttVal* httpEquiv = AttrGetById(pNode, TidyAttr_HTTP_EQUIV);
2180
AttVal* metaContent = AttrGetById(pNode, TidyAttr_CONTENT);
2182
if ( !nodeIsMETA(pNode) || !metaContent ||
2183
!AttrValueIs(httpEquiv, "Content-Type") )
2186
pszBegin = s = tmbstrdup( metaContent->value );
2187
while (pszBegin && *pszBegin)
2189
while (isspace( *pszBegin ))
2192
while ('\0' != *pszEnd && ';' != *pszEnd)
2194
if (';' == *pszEnd )
2196
if (pszEnd > pszBegin)
2198
prop = (StyleProp *)MemAlloc(sizeof(StyleProp));
2199
prop->name = tmbstrdup( pszBegin );
2203
if (NULL != pLastProp)
2204
pLastProp->next = prop;
2214
/* find the charset property */
2215
for (prop = pFirstProp; NULL != prop; prop = prop->next)
2217
if (0 != tmbstrncasecmp( prop->name, "charset", 7 ))
2220
MemFree( prop->name );
2221
prop->name = MemAlloc( 8 + tmbstrlen(enc) + 1 );
2222
tmbstrcpy(prop->name, "charset=");
2223
tmbstrcpy(prop->name+8, enc);
2224
s = CreatePropString( pFirstProp );
2225
MemFree( metaContent->value );
2226
metaContent->value = s;
2229
/* #718127, prevent memory leakage */
2230
FreeStyleProps(pFirstProp);
2236
void DropComments(TidyDocImpl* doc, Node* node)
2244
if (node->type == CommentTag)
2247
FreeNode(doc, node);
2253
DropComments(doc, node->content);
2259
void DropFontElements(TidyDocImpl* doc, Node* node, Node **ARG_UNUSED(pnode))
2267
if (nodeIsFONT(node))
2269
DiscardContainer(doc, node, &next);
2275
DropFontElements(doc, node->content, &next);
2281
void WbrToSpace(TidyDocImpl* doc, Node* node)
2289
if (nodeIsWBR(node))
2292
text = NewLiteralTextNode(doc->lexer, " ");
2293
InsertNodeAfterElement(node, text);
2295
FreeNode(doc, node);
2301
WbrToSpace(doc, node->content);
2308
Filters from Word and PowerPoint often use smart
2309
quotes resulting in character codes between 128
2310
and 159. Unfortunately, the corresponding HTML 4.0
2311
entities for these are not widely supported. The
2312
following converts dashes and quotation marks to
2313
the nearest ASCII equivalent. My thanks to
2314
Andrzej Novosiolov for his help with this code.
2316
Note: The old code in the pretty printer applied
2317
this to all node types and attribute values while
2318
this routine applies it only to text nodes. First,
2319
Microsoft Office products rarely put the relevant
2320
characters into these tokens, second support for
2321
them is much better now and last but not least, it
2322
can be harmful to replace these characters since
2323
US-ASCII quote marks are often used as syntax
2324
characters, a simple
2326
<a onmouseover="alert('‘')">...</a>
2328
would be broken if the U+2018 is replaced by "'".
2329
The old code would neither take care whether the
2330
quote mark is already used as delimiter,
2332
<p title='‘'>...</p>
2336
<p title='''>...</p>
2338
Since browser support is much better nowadays and
2339
high-quality typography is better than ASCII it'd
2340
be probably a good idea to drop the feature...
2342
void DowngradeTypography(TidyDocImpl* doc, Node* node)
2345
Lexer* lexer = doc->lexer;
2351
if (nodeIsText(node))
2354
tmbstr p = lexer->lexbuf + node->start;
2356
for (i = node->start; i < node->end; ++i)
2358
c = (unsigned char) lexer->lexbuf[i];
2361
i += GetUTF8(lexer->lexbuf + i, &c);
2363
if (c >= 0x2013 && c <= 0x201E)
2367
case 0x2013: /* en dash */
2368
case 0x2014: /* em dash */
2371
case 0x2018: /* left single quotation mark */
2372
case 0x2019: /* right single quotation mark */
2373
case 0x201A: /* single low-9 quotation mark */
2376
case 0x201C: /* left double quotation mark */
2377
case 0x201D: /* right double quotation mark */
2378
case 0x201E: /* double low-9 quotation mark */
2387
node->end = p - lexer->lexbuf;
2391
DowngradeTypography(doc, node->content);
2397
void ReplacePreformattedSpaces(TidyDocImpl* doc, Node* node)
2405
if (node->tag && node->tag->parser == ParsePre)
2407
NormalizeSpaces(doc->lexer, node->content);
2413
ReplacePreformattedSpaces(doc, node->content);
2419
void ConvertCDATANodes(TidyDocImpl* doc, Node* node)
2427
if (node->type == CDATATag)
2428
node->type = TextNode;
2431
ConvertCDATANodes(doc, node->content);
2438
FixLanguageInformation ensures that the document contains (only)
2439
the attributes for language information desired by the output
2440
document type. For example, for XHTML 1.0 documents both
2441
'xml:lang' and 'lang' are desired, for XHTML 1.1 only 'xml:lang'
2442
is desired and for HTML 4.01 only 'lang' is desired.
2444
void FixLanguageInformation(TidyDocImpl* doc, Node* node, Bool wantXmlLang, Bool wantLang)
2452
/* todo: report modifications made here to the report system */
2454
if (nodeIsElement(node))
2456
AttVal* lang = AttrGetById(node, TidyAttr_LANG);
2457
AttVal* xmlLang = AttrGetById(node, TidyAttr_XML_LANG);
2459
if (lang && xmlLang)
2462
todo: check whether both attributes are in sync,
2463
here or elsewhere, where elsewhere is probably
2465
AD - March 2005: not mandatory according the standards.
2468
else if (lang && wantXmlLang)
2470
if (NodeAttributeVersions( node, TidyAttr_XML_LANG )
2471
& doc->lexer->versionEmitted)
2472
RepairAttrValue(doc, node, "xml:lang", lang->value);
2474
else if (xmlLang && wantLang)
2476
if (NodeAttributeVersions( node, TidyAttr_LANG )
2477
& doc->lexer->versionEmitted)
2478
RepairAttrValue(doc, node, "lang", xmlLang->value);
2481
if (lang && !wantLang)
2482
RemoveAttribute(doc, node, lang);
2484
if (xmlLang && !wantXmlLang)
2485
RemoveAttribute(doc, node, xmlLang);
2489
FixLanguageInformation(doc, node->content, wantXmlLang, wantLang);
2496
Set/fix/remove <html xmlns='...'>
2498
void FixXhtmlNamespace(TidyDocImpl* doc, Bool wantXmlns)
2500
Node* html = FindHTML(doc);
2506
xmlns = AttrGetById(html, TidyAttr_XMLNS);
2510
if (!AttrValueIs(xmlns, XHTML_NAMESPACE))
2511
RepairAttrValue(doc, html, "xmlns", XHTML_NAMESPACE);
2515
RemoveAttribute(doc, html, xmlns);
2522
void FixAnchors(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId)
2530
if (IsAnchorElement(doc, node))
2532
AttVal *name = AttrGetById(node, TidyAttr_NAME);
2533
AttVal *id = AttrGetById(node, TidyAttr_ID);
2535
/* todo: how are empty name/id attributes handled? */
2539
Bool NameHasValue = AttrHasValue(name);
2540
Bool IdHasValue = AttrHasValue(id);
2541
if ( (NameHasValue != IdHasValue) ||
2542
(NameHasValue && IdHasValue &&
2543
tmbstrcmp(name->value, id->value) != 0 ) )
2544
ReportAttrError( doc, node, name, ID_NAME_MISMATCH);
2546
else if (name && wantId)
2548
if (NodeAttributeVersions( node, TidyAttr_ID )
2549
& doc->lexer->versionEmitted)
2551
if (IsValidHTMLID(name->value))
2553
RepairAttrValue(doc, node, "id", name->value);
2557
ReportAttrError(doc, node, name, INVALID_XML_ID);
2561
else if (id && wantName)
2563
if (NodeAttributeVersions( node, TidyAttr_NAME )
2564
& doc->lexer->versionEmitted)
2565
/* todo: do not assume id is valid */
2566
RepairAttrValue(doc, node, "name", id->value);
2570
RemoveAttribute(doc, node, id);
2572
if (name && !wantName)
2573
RemoveAttribute(doc, node, name);
2575
if (AttrGetById(node, TidyAttr_NAME) == NULL &&
2576
AttrGetById(node, TidyAttr_ID) == NULL)
2577
RemoveAnchorByNode(doc, node);
2581
FixAnchors(doc, node->content, wantName, wantId);