4
/* utf8.h -- convert characters to/from UTF-8
6
(c) 1998-2004 (W3C) MIT, ERCIM, Keio University
7
See tidy.h for the copyright notice.
11
$Author: terry_teague $
12
$Date: 2004/08/02 02:32:47 $
20
/* UTF-8 encoding/decoding support
21
** Does not convert character "codepoints", i.e. to/from 10646.
24
int DecodeUTF8BytesToChar( uint* c, uint firstByte, ctmbstr successorBytes,
25
TidyInputSource* inp, int* count );
27
int EncodeCharToUTF8Bytes( uint c, tmbstr encodebuf,
28
TidyOutputSink* outp, int* count );
31
uint GetUTF8( ctmbstr str, uint *ch );
32
tmbstr PutUTF8( tmbstr buf, uint c );
34
#define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */
35
#define UNICODE_BOM UNICODE_BOM_BE
36
#define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */
37
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
40
Bool IsValidUTF16FromUCS4( tchar ucs4 );
41
Bool IsHighSurrogate( tchar ch );
42
Bool IsLowSurrogate( tchar ch );
44
Bool IsCombinedChar( tchar ch );
45
Bool IsValidCombinedChar( tchar ch );
47
tchar CombineSurrogatePair( tchar high, tchar low );
48
Bool SplitSurrogatePair( tchar utf16, tchar* high, tchar* low );
52
#endif /* __UTF8_H__ */