1
////////////////////////////////////////////////////////////////////////////
2
// NoteCase notes manager project <http://notecase.sf.net>
4
// This code is licensed under BSD license.See "license.txt" for more details.
6
// File: Implements basic HTML parser class
7
////////////////////////////////////////////////////////////////////////////
10
#define HTMLParser_H__
13
// This class is a simple HTML parser, that is being fed with buffer chunks.
14
// Result is a series of events (virtual method calls) that signalize
15
// occurence of HTML entites like: tag, tag ending, comment or ordinary text in the stream
17
// In order to use this parser, you must inherit this class and override its event handlers
18
// to perform custom processing of HTML parse events.
20
// Additionally, this class offers two methods for escaping and unescaping of some special
21
// characters (for example char '<' is "escaped" to string "<" when saving HTML document to file)
22
// These methods rely on internal character conversion table, sorted by numerical value
23
// of the character, in order to speed up algorithm by using binary-search on the table.
24
// Characters are UTF-8, and they can be multibyte - more than one byte long.
33
virtual ~HTMLParser();
36
bool Parse(const char *szBuffer, int len);
38
static void EscapeChars(std::string &data);
39
static void UnescapeChars(std::string &data);
42
virtual void OnTagBegin(const char *szTag, const char *szParams = NULL) = 0;
43
virtual void OnTagEnd(const char *szTag) = 0;
44
virtual void OnComment(const char *szText) = 0;
45
virtual void OnText(const char *szText) = 0;
49
std::string m_strData;