1
/* tidylib.c -- internal library definitions
3
(c) 1998-2005 (W3C) MIT, ERCIM, Keio University
4
See tidy.h for the copyright notice.
9
$Date: 2005/04/08 09:11:13 $
12
Defines HTML Tidy API implemented by tidy library.
14
Very rough initial cut for discussion purposes.
16
Public interface is const-correct and doesn't explicitly depend
17
on any globals. Thus, thread-safety may be introduced w/out
18
changing the interface.
20
Looking ahead to a C++ wrapper, C functions always pass
21
this-equivalent as 1st arg.
23
Created 2001-05-20 by Charles Reitzel
39
#ifdef TIDY_WIN32_MLANG_SUPPORT
44
TidyDocImpl* tidyDocToImpl( TidyDoc tdoc )
46
return (TidyDocImpl*) tdoc;
48
TidyDoc tidyImplToDoc( TidyDocImpl* impl )
50
return (TidyDoc) impl;
53
Node* tidyNodeToImpl( TidyNode tnod )
57
TidyNode tidyImplToNode( Node* node )
59
return (TidyNode) node;
62
AttVal* tidyAttrToImpl( TidyAttr tattr )
64
return (AttVal*) tattr;
66
TidyAttr tidyImplToAttr( AttVal* attval )
68
return (TidyAttr) attval;
71
const TidyOptionImpl* tidyOptionToImpl( TidyOption topt )
73
return (const TidyOptionImpl*) topt;
75
TidyOption tidyImplToOption( const TidyOptionImpl* option )
77
return (TidyOption) option;
81
/* Tidy public interface
83
** Most functions return an integer:
91
TidyDoc TIDY_CALL tidyCreate(void)
93
TidyDocImpl* impl = tidyDocCreate();
94
return tidyImplToDoc( impl );
97
void TIDY_CALL tidyRelease( TidyDoc tdoc )
99
TidyDocImpl* impl = tidyDocToImpl( tdoc );
100
tidyDocRelease( impl );
103
TidyDocImpl* tidyDocCreate(void)
105
TidyDocImpl* doc = (TidyDocImpl*)MemAlloc( sizeof(TidyDocImpl) );
106
ClearMemory( doc, sizeof(*doc) );
114
/* By default, wire tidy messages to standard error.
115
** Document input will be set by parsing routines.
116
** Document output will be set by pretty print routines.
117
** Config input will be set by config parsing routines.
118
** But we need to start off with a way to report errors.
120
doc->errout = StdErrOutput();
124
void tidyDocRelease( TidyDocImpl* doc )
126
/* doc in/out opened and closed by parse/print routines */
129
assert( doc->docIn == NULL );
130
assert( doc->docOut == NULL );
132
ReleaseStreamOut( doc->errout );
137
FreeNode(doc, &doc->root);
138
ClearMemory(&doc->root, sizeof(Node));
140
if (doc->givenDoctype)
141
MemFree(doc->givenDoctype);
144
FreeAttrTable( doc );
150
/* Let application store a chunk of data w/ each Tidy tdocance.
151
** Useful for callbacks.
153
void TIDY_CALL tidySetAppData( TidyDoc tdoc, ulong appData )
155
TidyDocImpl* impl = tidyDocToImpl( tdoc );
157
impl->appData = appData;
159
ulong TIDY_CALL tidyGetAppData( TidyDoc tdoc )
161
TidyDocImpl* impl = tidyDocToImpl( tdoc );
163
return impl->appData;
167
ctmbstr TIDY_CALL tidyReleaseDate(void)
169
return ReleaseDate();
173
/* Get/set configuration options
175
Bool TIDY_CALL tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback )
177
TidyDocImpl* impl = tidyDocToImpl( tdoc );
180
impl->pOptCallback = pOptCallback;
187
int TIDY_CALL tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil )
189
TidyDocImpl* impl = tidyDocToImpl( tdoc );
191
return ParseConfigFile( impl, cfgfil );
195
int TIDY_CALL tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc )
197
TidyDocImpl* impl = tidyDocToImpl( tdoc );
199
return ParseConfigFileEnc( impl, cfgfil, charenc );
203
int TIDY_CALL tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam )
205
TidyDocImpl* impl = tidyDocToImpl( tdoc );
208
int enc = CharEncodingId( encnam );
209
if ( enc >= 0 && AdjustCharEncoding(impl, enc) )
212
ReportBadArgument( impl, "char-encoding" );
217
int TIDY_CALL tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam )
219
TidyDocImpl* impl = tidyDocToImpl( tdoc );
222
int enc = CharEncodingId( encnam );
223
if ( enc >= 0 && SetOptionInt( impl, TidyInCharEncoding, enc ) )
226
ReportBadArgument( impl, "in-char-encoding" );
231
int TIDY_CALL tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam )
233
TidyDocImpl* impl = tidyDocToImpl( tdoc );
236
int enc = CharEncodingId( encnam );
237
if ( enc >= 0 && SetOptionInt( impl, TidyOutCharEncoding, enc ) )
240
ReportBadArgument( impl, "out-char-encoding" );
245
TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam )
247
const TidyOptionImpl* option = lookupOption( optnam );
250
return N_TIDY_OPTIONS; /* Error */
253
TidyIterator TIDY_CALL tidyGetOptionList( TidyDoc tdoc )
255
TidyDocImpl* impl = tidyDocToImpl( tdoc );
257
return getOptionList( impl );
258
return (TidyIterator) -1;
261
TidyOption TIDY_CALL tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos )
263
TidyDocImpl* impl = tidyDocToImpl( tdoc );
264
const TidyOptionImpl* option = NULL;
266
option = getNextOption( impl, pos );
269
return tidyImplToOption( option );
273
TidyOption TIDY_CALL tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId )
275
const TidyOptionImpl* option = getOption( optId );
276
return tidyImplToOption( option );
278
TidyOption TIDY_CALL tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam )
280
const TidyOptionImpl* option = lookupOption( optnam );
281
return tidyImplToOption( option );
284
TidyOptionId TIDY_CALL tidyOptGetId( TidyOption topt )
286
const TidyOptionImpl* option = tidyOptionToImpl( topt );
289
return N_TIDY_OPTIONS;
291
ctmbstr TIDY_CALL tidyOptGetName( TidyOption topt )
293
const TidyOptionImpl* option = tidyOptionToImpl( topt );
298
TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt )
300
const TidyOptionImpl* option = tidyOptionToImpl( topt );
303
return (TidyOptionType) -1;
305
TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt )
307
const TidyOptionImpl* option = tidyOptionToImpl( topt );
309
return option->category;
310
return (TidyConfigCategory) -1;
312
ctmbstr TIDY_CALL tidyOptGetDefault( TidyOption topt )
314
const TidyOptionImpl* option = tidyOptionToImpl( topt );
315
if ( option && option->type == TidyString )
316
return (ctmbstr) option->dflt;
319
ulong TIDY_CALL tidyOptGetDefaultInt( TidyOption topt )
321
const TidyOptionImpl* option = tidyOptionToImpl( topt );
322
if ( option && option->type != TidyString )
326
Bool TIDY_CALL tidyOptGetDefaultBool( TidyOption topt )
328
const TidyOptionImpl* option = tidyOptionToImpl( topt );
329
if ( option && option->type != TidyString )
330
return ( option->dflt ? yes : no );
333
Bool TIDY_CALL tidyOptIsReadOnly( TidyOption topt )
335
const TidyOptionImpl* option = tidyOptionToImpl( topt );
337
return ( option->parser == NULL );
342
TidyIterator TIDY_CALL tidyOptGetPickList( TidyOption topt )
344
const TidyOptionImpl* option = tidyOptionToImpl( topt );
346
return getOptionPickList( option );
347
return (TidyIterator) -1;
349
ctmbstr TIDY_CALL tidyOptGetNextPick( TidyOption topt, TidyIterator* pos )
351
const TidyOptionImpl* option = tidyOptionToImpl( topt );
353
return getNextOptionPick( option, pos );
358
ctmbstr TIDY_CALL tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId )
360
TidyDocImpl* impl = tidyDocToImpl( tdoc );
361
ctmbstr optval = NULL;
363
optval = cfgStr( impl, optId );
366
Bool TIDY_CALL tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val )
368
TidyDocImpl* impl = tidyDocToImpl( tdoc );
370
return ParseConfigValue( impl, optId, val );
373
Bool TIDY_CALL tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val )
375
TidyDocImpl* impl = tidyDocToImpl( tdoc );
377
return ParseConfigOption( impl, optnam, val );
381
ulong TIDY_CALL tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId )
383
TidyDocImpl* impl = tidyDocToImpl( tdoc );
386
opti = cfg( impl, optId );
390
Bool TIDY_CALL tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val )
392
TidyDocImpl* impl = tidyDocToImpl( tdoc );
394
return SetOptionInt( impl, optId, val );
398
Bool TIDY_CALL tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId )
400
TidyDocImpl* impl = tidyDocToImpl( tdoc );
404
const TidyOptionImpl* option = getOption( optId );
407
optb = cfgBool( impl, optId );
413
Bool TIDY_CALL tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val )
415
TidyDocImpl* impl = tidyDocToImpl( tdoc );
417
return SetOptionBool( impl, optId, val );
421
ctmbstr TIDY_CALL tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId )
423
uint enc = tidyOptGetInt( tdoc, optId );
424
return CharEncodingOptName( enc );
427
ctmbstr TIDY_CALL tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId )
429
const TidyOptionImpl* option = getOption( optId );
430
if ( option && option->pickList )
432
uint ix, pick = tidyOptGetInt( tdoc, optId );
433
const ctmbstr* pL = option->pickList;
434
for ( ix=0; *pL && ix < pick; ++ix )
443
TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc )
445
TidyDocImpl* impl = tidyDocToImpl( tdoc );
446
TidyIterator declIter = 0;
448
declIter = GetDeclaredTagList( impl );
452
ctmbstr TIDY_CALL tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId,
455
TidyDocImpl* impl = tidyDocToImpl( tdoc );
456
ctmbstr tagnam = NULL;
459
UserTagType tagtyp = tagtype_null;
460
if ( optId == TidyInlineTags )
461
tagtyp = tagtype_inline;
462
else if ( optId == TidyBlockTags )
463
tagtyp = tagtype_block;
464
else if ( optId == TidyEmptyTags )
465
tagtyp = tagtype_empty;
466
else if ( optId == TidyPreTags )
467
tagtyp = tagtype_pre;
468
if ( tagtyp != tagtype_null )
469
tagnam = GetNextDeclaredTag( impl, tagtyp, iter );
474
ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
476
const TidyOptionId optId = tidyOptGetId( opt );
477
const TidyOptionDoc* docDesc = tidyOptGetDocDesc( optId );
478
return docDesc ? docDesc->doc : NULL;
481
TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
483
const TidyOptionId optId = tidyOptGetId( opt );
484
const TidyOptionDoc* docDesc = tidyOptGetDocDesc( optId );
485
if (docDesc && docDesc->links)
486
return (TidyIterator)docDesc->links;
487
return (TidyIterator)NULL;
490
TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos )
492
const TidyOptionId* curr = (TidyOptionId *)*pos;
495
if (*curr == TidyUnknownOption)
497
*pos = (TidyIterator)NULL;
498
return (TidyOption)0;
500
opt = tidyGetOption(tdoc, *curr);
502
*pos = (*curr == TidyUnknownOption ) ?
503
(TidyIterator)NULL:(TidyIterator)curr;
507
int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil )
509
TidyDocImpl* impl = tidyDocToImpl( tdoc );
511
return SaveConfigFile( impl, cfgfil );
515
int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink )
517
TidyDocImpl* impl = tidyDocToImpl( tdoc );
519
return SaveConfigSink( impl, sink );
523
Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc )
525
TidyDocImpl* impl = tidyDocToImpl( tdoc );
528
TakeConfigSnapshot( impl );
533
Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc )
535
TidyDocImpl* impl = tidyDocToImpl( tdoc );
538
ResetConfigToSnapshot( impl );
543
Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc )
545
TidyDocImpl* impl = tidyDocToImpl( tdoc );
548
ResetConfigToDefault( impl );
554
Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId )
556
TidyDocImpl* impl = tidyDocToImpl( tdoc );
558
return ResetOptionToDefault( impl, optId );
562
Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc )
564
TidyDocImpl* impl = tidyDocToImpl( tdoc );
566
return ConfigDiffThanDefault( impl );
569
Bool TIDY_CALL tidyOptDiffThanSnapshot( TidyDoc tdoc )
571
TidyDocImpl* impl = tidyDocToImpl( tdoc );
573
return ConfigDiffThanSnapshot( impl );
577
Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
579
TidyDocImpl* docTo = tidyDocToImpl( to );
580
TidyDocImpl* docFrom = tidyDocToImpl( from );
581
if ( docTo && docFrom )
583
CopyConfig( docTo, docFrom );
590
/* I/O and Message handling interface
592
** By default, Tidy will define, create and use
593
** tdocances of input and output handlers for
594
** standard C buffered I/O (i.e. FILE* stdin,
595
** FILE* stdout and FILE* stderr for content
596
** input, content output and diagnostic output,
597
** respectively. A FILE* cfgFile input handler
598
** will be used for config files. Command line
599
** options will just be set directly.
602
/* Use TidyReportFilter to filter messages by diagnostic level:
603
** info, warning, etc. Just set diagnostic output
604
** handler to redirect all diagnostics output. Return true
605
** to proceed with output, false to cancel.
607
Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt )
609
TidyDocImpl* impl = tidyDocToImpl( tdoc );
612
impl->mssgFilt = filt;
619
int tidySetContentOutputSink( TidyDoc tdoc, TidyOutputSink* outp )
621
TidyDocImpl* impl = tidyDocToImpl( tdoc );
629
int tidySetDiagnosticOutputSink( TidyDoc tdoc, TidyOutputSink* outp )
631
TidyDocImpl* impl = tidyDocToImpl( tdoc );
643
cmbstr tidyLookupMessage( TidyDoc tdoc, int errorNo )
645
TidyDocImpl* impl = tidyDocToImpl( tdoc );
648
mssg = tidyMessage_Lookup( impl->messages, errorNo );
654
FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam )
656
TidyDocImpl* impl = tidyDocToImpl( tdoc );
659
FILE* errout = fopen( errfilnam, "wb" );
662
uint outenc = cfg( impl, TidyOutCharEncoding );
663
uint nl = cfg( impl, TidyNewline );
664
ReleaseStreamOut( impl->errout );
665
impl->errout = FileOutput( errout, outenc, nl );
668
else /* Emit message to current error sink */
669
FileError( impl, errfilnam, TidyError );
674
int TIDY_CALL tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf )
676
TidyDocImpl* impl = tidyDocToImpl( tdoc );
679
uint outenc = cfg( impl, TidyOutCharEncoding );
680
uint nl = cfg( impl, TidyNewline );
681
ReleaseStreamOut( impl->errout );
682
impl->errout = BufferOutput( errbuf, outenc, nl );
683
return ( impl->errout ? 0 : -ENOMEM );
688
int TIDY_CALL tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink )
690
TidyDocImpl* impl = tidyDocToImpl( tdoc );
693
uint outenc = cfg( impl, TidyOutCharEncoding );
694
uint nl = cfg( impl, TidyNewline );
695
ReleaseStreamOut( impl->errout );
696
impl->errout = UserOutput( sink, outenc, nl );
697
return ( impl->errout ? 0 : -ENOMEM );
704
int TIDY_CALL tidyStatus( TidyDoc tdoc )
706
TidyDocImpl* impl = tidyDocToImpl( tdoc );
707
int tidyStat = -EINVAL;
709
tidyStat = tidyDocStatus( impl );
712
int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) )
714
/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
717
Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) )
719
/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
722
Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) )
724
/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
728
uint TIDY_CALL tidyErrorCount( TidyDoc tdoc )
730
TidyDocImpl* impl = tidyDocToImpl( tdoc );
731
uint count = 0xFFFFFFFF;
733
count = impl->errors;
736
uint TIDY_CALL tidyWarningCount( TidyDoc tdoc )
738
TidyDocImpl* impl = tidyDocToImpl( tdoc );
739
uint count = 0xFFFFFFFF;
741
count = impl->warnings;
744
uint TIDY_CALL tidyAccessWarningCount( TidyDoc tdoc )
746
TidyDocImpl* impl = tidyDocToImpl( tdoc );
747
uint count = 0xFFFFFFFF;
749
count = impl->accessErrors;
752
uint TIDY_CALL tidyConfigErrorCount( TidyDoc tdoc )
754
TidyDocImpl* impl = tidyDocToImpl( tdoc );
755
uint count = 0xFFFFFFFF;
757
count = impl->optionErrors;
762
/* Error reporting functions
764
void TIDY_CALL tidyErrorSummary( TidyDoc tdoc )
766
TidyDocImpl* impl = tidyDocToImpl( tdoc );
768
ErrorSummary( impl );
770
void TIDY_CALL tidyGeneralInfo( TidyDoc tdoc )
772
TidyDocImpl* impl = tidyDocToImpl( tdoc );
780
** Initial version supports only whole-file operations.
781
** Do not expose Tidy StreamIn or Out data structures - yet.
784
/* Parse/load Functions
786
** HTML/XHTML version determined from input.
788
int TIDY_CALL tidyParseFile( TidyDoc tdoc, ctmbstr filnam )
790
TidyDocImpl* doc = tidyDocToImpl( tdoc );
791
return tidyDocParseFile( doc, filnam );
793
int TIDY_CALL tidyParseStdin( TidyDoc tdoc )
795
TidyDocImpl* doc = tidyDocToImpl( tdoc );
796
return tidyDocParseStdin( doc );
798
int TIDY_CALL tidyParseString( TidyDoc tdoc, ctmbstr content )
800
TidyDocImpl* doc = tidyDocToImpl( tdoc );
801
return tidyDocParseString( doc, content );
803
int TIDY_CALL tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf )
805
TidyDocImpl* doc = tidyDocToImpl( tdoc );
806
return tidyDocParseBuffer( doc, inbuf );
808
int TIDY_CALL tidyParseSource( TidyDoc tdoc, TidyInputSource* source )
810
TidyDocImpl* doc = tidyDocToImpl( tdoc );
811
return tidyDocParseSource( doc, source );
815
int tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam )
817
int status = -ENOENT;
818
FILE* fin = fopen( filnam, "rb" );
820
#if PRESERVE_FILE_TIMES
821
struct stat sbuf = {0};
822
/* get last modified time */
823
ClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
824
if ( fin && cfgBool(doc,TidyKeepFileTimes) &&
825
fstat(fileno(fin), &sbuf) != -1 )
827
doc->filetimes.actime = sbuf.st_atime;
828
doc->filetimes.modtime = sbuf.st_mtime;
834
StreamIn* in = FileInput( doc, fin, cfg( doc, TidyInCharEncoding ));
835
status = tidyDocParseStream( doc, in );
836
freeFileSource(&in->source, yes);
839
else /* Error message! */
840
FileError( doc, filnam, TidyError );
844
int tidyDocParseStdin( TidyDocImpl* doc )
846
StreamIn* in = FileInput( doc, stdin, cfg( doc, TidyInCharEncoding ));
847
int status = tidyDocParseStream( doc, in );
852
int tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf )
854
int status = -EINVAL;
857
StreamIn* in = BufferInput( doc, inbuf, cfg( doc, TidyInCharEncoding ));
858
status = tidyDocParseStream( doc, in );
864
int tidyDocParseString( TidyDocImpl* doc, ctmbstr content )
866
int status = -EINVAL;
867
TidyBuffer inbuf = {0};
872
tidyBufAttach( &inbuf, (void*)content, tmbstrlen(content)+1 );
873
in = BufferInput( doc, &inbuf, cfg( doc, TidyInCharEncoding ));
874
status = tidyDocParseStream( doc, in );
875
tidyBufDetach( &inbuf );
881
int tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source )
883
StreamIn* in = UserInput( doc, source, cfg( doc, TidyInCharEncoding ));
884
int status = tidyDocParseStream( doc, in );
890
/* Print/save Functions
893
int TIDY_CALL tidySaveFile( TidyDoc tdoc, ctmbstr filnam )
895
TidyDocImpl* doc = tidyDocToImpl( tdoc );
896
return tidyDocSaveFile( doc, filnam );
898
int TIDY_CALL tidySaveStdout( TidyDoc tdoc )
900
TidyDocImpl* doc = tidyDocToImpl( tdoc );
901
return tidyDocSaveStdout( doc );
903
int TIDY_CALL tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen )
905
TidyDocImpl* doc = tidyDocToImpl( tdoc );
906
return tidyDocSaveString( doc, buffer, buflen );
908
int TIDY_CALL tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf )
910
TidyDocImpl* doc = tidyDocToImpl( tdoc );
911
return tidyDocSaveBuffer( doc, outbuf );
913
int TIDY_CALL tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink )
915
TidyDocImpl* doc = tidyDocToImpl( tdoc );
916
return tidyDocSaveSink( doc, sink );
919
int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
921
int status = -ENOENT;
924
/* Don't zap input file if no output */
925
if ( doc->errors > 0 &&
926
cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
927
status = tidyDocStatus( doc );
929
fout = fopen( filnam, "wb" );
933
uint outenc = cfg( doc, TidyOutCharEncoding );
934
uint nl = cfg( doc, TidyNewline );
935
StreamOut* out = FileOutput( fout, outenc, nl );
937
status = tidyDocSaveStream( doc, out );
942
#if PRESERVE_FILE_TIMES
943
if ( doc->filetimes.actime )
945
/* set file last accessed/modified times to original values */
946
utime( filnam, &doc->filetimes );
947
ClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
949
#endif /* PRESERVFILETIMES */
951
if ( status < 0 ) /* Error message! */
952
FileError( doc, filnam, TidyError );
958
/* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3.
959
** The code has been left in in case it works w/ other compilers
960
** or operating systems. If stdout is in Text mode, be aware that
961
** it will garble UTF16 documents. In text mode, when it encounters
962
** a single byte of value 10 (0xA), it will insert a single byte
963
** value 13 (0xD) just before it. This has the effect of garbling
964
** the entire document.
967
#if !defined(NO_SETMODE_SUPPORT)
969
#if defined(_WIN32) || defined(OS2_OS)
976
int tidyDocSaveStdout( TidyDocImpl* doc )
978
#if !defined(NO_SETMODE_SUPPORT)
980
#if defined(_WIN32) || defined(OS2_OS)
981
int oldstdoutmode = -1, oldstderrmode = -1;
986
uint outenc = cfg( doc, TidyOutCharEncoding );
987
uint nl = cfg( doc, TidyNewline );
988
StreamOut* out = FileOutput( stdout, outenc, nl );
990
#if !defined(NO_SETMODE_SUPPORT)
992
#if defined(_WIN32) || defined(OS2_OS)
993
oldstdoutmode = setmode( fileno(stdout), _O_BINARY );
994
oldstderrmode = setmode( fileno(stderr), _O_BINARY );
1000
status = tidyDocSaveStream( doc, out );
1005
#if !defined(NO_SETMODE_SUPPORT)
1007
#if defined(_WIN32) || defined(OS2_OS)
1008
if ( oldstdoutmode != -1 )
1009
oldstdoutmode = setmode( fileno(stdout), oldstdoutmode );
1010
if ( oldstderrmode != -1 )
1011
oldstderrmode = setmode( fileno(stderr), oldstderrmode );
1020
int tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
1022
uint outenc = cfg( doc, TidyOutCharEncoding );
1023
uint nl = cfg( doc, TidyNewline );
1024
TidyBuffer outbuf = {0};
1026
StreamOut* out = BufferOutput( &outbuf, outenc, nl );
1027
int status = tidyDocSaveStream( doc, out );
1029
if ( outbuf.size > *buflen )
1032
memcpy( buffer, outbuf.bp, outbuf.size );
1034
*buflen = outbuf.size;
1035
tidyBufFree( &outbuf );
1040
int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
1042
int status = -EINVAL;
1045
uint outenc = cfg( doc, TidyOutCharEncoding );
1046
uint nl = cfg( doc, TidyNewline );
1047
StreamOut* out = BufferOutput( outbuf, outenc, nl );
1049
status = tidyDocSaveStream( doc, out );
1055
int tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink )
1057
uint outenc = cfg( doc, TidyOutCharEncoding );
1058
uint nl = cfg( doc, TidyNewline );
1059
StreamOut* out = UserOutput( sink, outenc, nl );
1060
int status = tidyDocSaveStream( doc, out );
1065
int tidyDocStatus( TidyDocImpl* doc )
1067
if ( doc->errors > 0 )
1069
if ( doc->warnings > 0 || doc->accessErrors > 0 )
1076
int TIDY_CALL tidyCleanAndRepair( TidyDoc tdoc )
1078
TidyDocImpl* impl = tidyDocToImpl( tdoc );
1080
return tidyDocCleanAndRepair( impl );
1084
int TIDY_CALL tidyRunDiagnostics( TidyDoc tdoc )
1086
TidyDocImpl* impl = tidyDocToImpl( tdoc );
1088
return tidyDocRunDiagnostics( impl );
1093
/* Workhorse functions.
1095
** Parse requires input source, all input config items
1096
** and diagnostic sink to have all been set before calling.
1098
** Emit likewise requires that document sink and all
1099
** pretty printing options have been set.
1101
static ctmbstr integrity = "\nPanic - tree has lost its integrity\n";
1103
int tidyDocParseStream( TidyDocImpl* doc, StreamIn* in )
1105
Bool xmlIn = cfgBool( doc, TidyXmlTags );
1108
assert( doc != NULL && in != NULL );
1109
assert( doc->docIn == NULL );
1112
TakeConfigSnapshot( doc ); /* Save config state */
1116
FreeNode(doc, &doc->root);
1117
ClearMemory(&doc->root, sizeof(Node));
1119
if (doc->givenDoctype)
1120
MemFree(doc->givenDoctype);
1122
doc->givenDoctype = NULL;
1124
doc->lexer = NewLexer( doc );
1125
/* doc->lexer->root = &doc->root; */
1126
doc->root.line = doc->lexer->lines;
1127
doc->root.column = doc->lexer->columns;
1128
doc->inputHadBOM = no;
1130
bomEnc = ReadBOMEncoding(in);
1134
in->encoding = bomEnc;
1135
SetOptionInt(doc, TidyInCharEncoding, bomEnc);
1138
#ifdef TIDY_WIN32_MLANG_SUPPORT
1139
if (in->encoding > WIN32MLANG)
1140
Win32MLangInitInputTranscoder(in, in->encoding);
1141
#endif /* TIDY_WIN32_MLANG_SUPPORT */
1143
/* Tidy doesn't alter the doctype for generic XML docs */
1146
ParseXMLDocument( doc );
1147
if ( !CheckNodeIntegrity( &doc->root ) )
1148
FatalError( integrity );
1153
ParseDocument( doc );
1154
if ( !CheckNodeIntegrity( &doc->root ) )
1155
FatalError( integrity );
1158
#ifdef TIDY_WIN32_MLANG_SUPPORT
1159
Win32MLangUninitInputTranscoder(in);
1160
#endif /* TIDY_WIN32_MLANG_SUPPORT */
1163
return tidyDocStatus( doc );
1166
int tidyDocRunDiagnostics( TidyDocImpl* doc )
1168
uint acclvl = cfg( doc, TidyAccessibilityCheckLevel );
1169
Bool quiet = cfgBool( doc, TidyQuiet );
1170
Bool force = cfgBool( doc, TidyForceOutput );
1175
ReportMarkupVersion( doc );
1176
ReportNumWarnings( doc );
1179
if ( doc->errors > 0 && !force )
1180
NeedsAuthorIntervention( doc );
1182
#if SUPPORT_ACCESSIBILITY_CHECKS
1184
AccessibilityChecks( doc );
1187
return tidyDocStatus( doc );
1190
int tidyDocCleanAndRepair( TidyDocImpl* doc )
1192
Bool word2K = cfgBool( doc, TidyWord2000 );
1193
Bool logical = cfgBool( doc, TidyLogicalEmphasis );
1194
Bool clean = cfgBool( doc, TidyMakeClean );
1195
Bool dropFont = cfgBool( doc, TidyDropFontTags );
1196
Bool htmlOut = cfgBool( doc, TidyHtmlOut );
1197
Bool xmlOut = cfgBool( doc, TidyXmlOut );
1198
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1199
Bool xmlDecl = cfgBool( doc, TidyXmlDecl );
1200
Bool tidyMark = cfgBool( doc, TidyMark );
1201
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
1205
return tidyDocStatus( doc );
1207
/* simplifies <b><b> ... </b> ...</b> etc. */
1208
NestedEmphasis( doc, &doc->root );
1210
/* cleans up <dir>indented text</dir> etc. */
1211
List2BQ( doc, &doc->root );
1212
BQ2Div( doc, &doc->root );
1214
/* replaces i by em and b by strong */
1216
EmFromI( doc, &doc->root );
1218
if ( word2K && IsWord2000(doc) )
1220
/* prune Word2000's <![if ...]> ... <![endif]> */
1221
DropSections( doc, &doc->root );
1223
/* drop style & class attributes and empty p, span elements */
1224
CleanWord2000( doc, &doc->root );
1227
/* replaces presentational markup by style rules */
1228
if ( clean || dropFont )
1229
CleanDocument( doc );
1231
/* Move terminating <br /> tags from out of paragraphs */
1232
/*! Do we want to do this for all block-level elements? */
1234
/* This is disabled due to http://tidy.sf.net/bug/681116 */
1236
FixBrakes( doc, FindBody( doc ));
1239
/* Reconcile http-equiv meta element with output encoding */
1240
if (cfg( doc, TidyOutCharEncoding) != RAW
1241
#ifndef NO_NATIVE_ISO2022_SUPPORT
1242
&& cfg( doc, TidyOutCharEncoding) != ISO2022
1245
VerifyHTTPEquiv( doc, FindHEAD( doc ));
1247
if ( !CheckNodeIntegrity( &doc->root ) )
1248
FatalError( integrity );
1250
/* remember given doctype for reporting */
1251
node = FindDocType(doc);
1254
AttVal* fpi = GetAttrByName(node, "PUBLIC");
1255
if (AttrHasValue(fpi))
1256
doc->givenDoctype = tmbstrdup(fpi->value);
1259
if ( doc->root.content )
1261
/* If we had XHTML input but want HTML output */
1262
if ( htmlOut && doc->lexer->isvoyager )
1264
Node* node = FindDocType(doc);
1265
/* Remove reference, but do not free */
1270
if (xhtmlOut && !htmlOut)
1272
SetXHTMLDocType(doc);
1273
FixAnchors(doc, &doc->root, yes, yes);
1274
FixXhtmlNamespace(doc, yes);
1275
FixLanguageInformation(doc, &doc->root, yes, yes);
1280
FixAnchors(doc, &doc->root, yes, yes);
1281
FixXhtmlNamespace(doc, no);
1282
FixLanguageInformation(doc, &doc->root, no, yes);
1289
/* ensure presence of initial <?xml version="1.0"?> */
1290
if ( xmlOut && xmlDecl )
1293
return tidyDocStatus( doc );
1296
int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
1298
Bool showMarkup = cfgBool( doc, TidyShowMarkup );
1299
Bool forceOutput = cfgBool( doc, TidyForceOutput );
1300
#if SUPPORT_UTF16_ENCODINGS
1301
Bool outputBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState );
1302
Bool smartBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState );
1304
Bool xmlOut = cfgBool( doc, TidyXmlOut );
1305
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1306
Bool bodyOnly = cfgBool( doc, TidyBodyOnly );
1308
Bool dropComments = cfgBool(doc, TidyHideComments);
1309
Bool makeClean = cfgBool(doc, TidyMakeClean);
1310
Bool asciiChars = cfgBool(doc, TidyAsciiChars);
1311
Bool makeBare = cfgBool(doc, TidyMakeBare);
1312
Bool escapeCDATA = cfgBool(doc, TidyEscapeCdata);
1315
ConvertCDATANodes(doc, &doc->root);
1318
DropComments(doc, &doc->root);
1323
DropFontElements(doc, &doc->root, NULL);
1324
WbrToSpace(doc, &doc->root);
1327
if ((makeClean && asciiChars) || makeBare)
1328
DowngradeTypography(doc, &doc->root);
1331
/* Note: no longer replaces in */
1332
/* attribute values / non-text tokens */
1333
NormalizeSpaces(doc->lexer, &doc->root);
1335
ReplacePreformattedSpaces(doc, &doc->root);
1337
if ( showMarkup && (doc->errors == 0 || forceOutput) )
1339
#if SUPPORT_UTF16_ENCODINGS
1340
/* Output a Byte Order Mark if required */
1341
if ( outputBOM || (doc->inputHadBOM && smartBOM) )
1345
/* No longer necessary. No DOCTYPE == HTML 3.2,
1346
** which gives you only the basic character entities,
1347
** which are safe in any browser.
1348
** if ( !FindDocType(doc) )
1349
** SetOptionBool( doc, TidyNumEntities, yes );
1353
if ( xmlOut && !xhtmlOut )
1354
PPrintXMLTree( doc, NORMAL, 0, &doc->root );
1355
else if ( bodyOnly )
1358
PPrintTree( doc, NORMAL, 0, &doc->root );
1360
PFlushLine( doc, 0 );
1364
ResetConfigToSnapshot( doc );
1365
return tidyDocStatus( doc );
1368
/* Tree traversal functions
1370
** The big issue here is the degree to which we should mimic
1371
** a DOM and/or SAX nodes.
1373
** Is it 100% possible (and, if so, how difficult is it) to
1374
** emit SAX events from this API? If SAX events are possible,
1375
** is that 100% of data needed to build a DOM?
1378
TidyNode TIDY_CALL tidyGetRoot( TidyDoc tdoc )
1380
TidyDocImpl* impl = tidyDocToImpl( tdoc );
1381
return tidyImplToNode( &impl->root );
1384
TidyNode TIDY_CALL tidyGetHtml( TidyDoc tdoc )
1386
TidyDocImpl* impl = tidyDocToImpl( tdoc );
1389
node = FindHTML( impl );
1390
return tidyImplToNode( node );
1393
TidyNode TIDY_CALL tidyGetHead( TidyDoc tdoc )
1395
TidyDocImpl* impl = tidyDocToImpl( tdoc );
1398
node = FindHEAD( impl );
1399
return tidyImplToNode( node );
1402
TidyNode TIDY_CALL tidyGetBody( TidyDoc tdoc )
1404
TidyDocImpl* impl = tidyDocToImpl( tdoc );
1407
node = FindBody( impl );
1408
return tidyImplToNode( node );
1411
/* parent / child */
1412
TidyNode TIDY_CALL tidyGetParent( TidyNode tnod )
1414
Node* nimp = tidyNodeToImpl( tnod );
1415
return tidyImplToNode( nimp->parent );
1417
TidyNode TIDY_CALL tidyGetChild( TidyNode tnod )
1419
Node* nimp = tidyNodeToImpl( tnod );
1420
return tidyImplToNode( nimp->content );
1424
TidyNode TIDY_CALL tidyGetNext( TidyNode tnod )
1426
Node* nimp = tidyNodeToImpl( tnod );
1427
return tidyImplToNode( nimp->next );
1429
TidyNode TIDY_CALL tidyGetPrev( TidyNode tnod )
1431
Node* nimp = tidyNodeToImpl( tnod );
1432
return tidyImplToNode( nimp->prev );
1436
TidyNodeType TIDY_CALL tidyNodeGetType( TidyNode tnod )
1438
Node* nimp = tidyNodeToImpl( tnod );
1439
TidyNodeType ntyp = TidyNode_Root;
1441
ntyp = (TidyNodeType) nimp->type;
1445
uint TIDY_CALL tidyNodeLine( TidyNode tnod )
1447
Node* nimp = tidyNodeToImpl( tnod );
1453
uint TIDY_CALL tidyNodeColumn( TidyNode tnod )
1455
Node* nimp = tidyNodeToImpl( tnod );
1462
ctmbstr TIDY_CALL tidyNodeGetName( TidyNode tnod )
1464
Node* nimp = tidyNodeToImpl( tnod );
1465
ctmbstr nnam = NULL;
1467
nnam = nimp->element;
1472
Bool TIDY_CALL tidyNodeHasText( TidyDoc tdoc, TidyNode tnod )
1474
TidyDocImpl* doc = tidyDocToImpl( tdoc );
1476
return nodeHasText( doc, tidyNodeToImpl(tnod) );
1481
Bool TIDY_CALL tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf )
1483
TidyDocImpl* doc = tidyDocToImpl( tdoc );
1484
Node* nimp = tidyNodeToImpl( tnod );
1485
if ( doc && nimp && outbuf )
1487
uint outenc = cfg( doc, TidyOutCharEncoding );
1488
uint nl = cfg( doc, TidyNewline );
1489
StreamOut* out = BufferOutput( outbuf, outenc, nl );
1490
Bool xmlOut = cfgBool( doc, TidyXmlOut );
1491
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1494
if ( xmlOut && !xhtmlOut )
1495
PPrintXMLTree( doc, NORMAL, 0, nimp );
1497
PPrintTree( doc, NORMAL, 0, nimp );
1499
PFlushLine( doc, 0 );
1509
Bool TIDY_CALL tidyNodeIsProp( TidyDoc ARG_UNUSED(tdoc), TidyNode tnod )
1511
Node* nimp = tidyNodeToImpl( tnod );
1512
Bool isProprietary = yes;
1515
switch ( nimp->type )
1531
isProprietary = yes;
1537
isProprietary = ( nimp->tag
1538
? (nimp->tag->versions&VERS_PROPRIETARY)!=0
1543
return isProprietary;
1546
TidyTagId TIDY_CALL tidyNodeGetId(TidyNode tnod)
1548
Node* nimp = tidyNodeToImpl(tnod);
1550
TidyTagId tagId = TidyTag_UNKNOWN;
1551
if (nimp && nimp->tag)
1552
tagId = nimp->tag->id;
1558
/* Null for non-element nodes and all pure HTML
1559
cmbstr tidyNodeNsLocal( TidyNode tnod )
1562
cmbstr tidyNodeNsPrefix( TidyNode tnod )
1565
cmbstr tidyNodeNsUri( TidyNode tnod )
1570
/* Iterate over attribute values */
1571
TidyAttr TIDY_CALL tidyAttrFirst( TidyNode tnod )
1573
Node* nimp = tidyNodeToImpl( tnod );
1574
AttVal* attval = NULL;
1576
attval = nimp->attributes;
1577
return tidyImplToAttr( attval );
1579
TidyAttr TIDY_CALL tidyAttrNext( TidyAttr tattr )
1581
AttVal* attval = tidyAttrToImpl( tattr );
1582
AttVal* nxtval = NULL;
1584
nxtval = attval->next;
1585
return tidyImplToAttr( nxtval );
1588
ctmbstr TIDY_CALL tidyAttrName( TidyAttr tattr )
1590
AttVal* attval = tidyAttrToImpl( tattr );
1591
ctmbstr anam = NULL;
1593
anam = attval->attribute;
1596
ctmbstr TIDY_CALL tidyAttrValue( TidyAttr tattr )
1598
AttVal* attval = tidyAttrToImpl( tattr );
1599
ctmbstr aval = NULL;
1601
aval = attval->value;
1605
/* Null for pure HTML
1606
ctmbstr tidyAttrNsLocal( TidyAttr tattr )
1609
ctmbstr tidyAttrNsPrefix( TidyAttr tattr )
1612
ctmbstr tidyAttrNsUri( TidyAttr tattr )
1617
TidyAttrId TIDY_CALL tidyAttrGetId( TidyAttr tattr )
1619
AttVal* attval = tidyAttrToImpl( tattr );
1620
TidyAttrId attrId = TidyAttr_UNKNOWN;
1621
if ( attval && attval->dict )
1622
attrId = attval->dict->id;
1625
Bool TIDY_CALL tidyAttrIsProp( TidyAttr tattr )
1627
AttVal* attval = tidyAttrToImpl( tattr );
1628
Bool isProprietary = yes;
1630
isProprietary = ( attval->dict
1631
? (attval->dict->versions & VERS_PROPRIETARY) != 0
1633
return isProprietary;