1
/******************************************************************************
2
* swbasicfilter.h - definition of class SWBasicFilter. An SWFilter
3
* impl that provides some basic methods that
4
* many filter will need and can use as a starting
7
* $Id: swbasicfilter.h 1984 2006-10-08 05:06:52Z scribe $
9
* Copyright 1998 CrossWire Bible Society (http://www.crosswire.org)
10
* CrossWire Bible Society
12
* Tempe, AZ 85280-2528
14
* This program is free software; you can redistribute it and/or modify it
15
* under the terms of the GNU General Public License as published by the
16
* Free Software Foundation version 2.
18
* This program is distributed in the hope that it will be useful, but
19
* WITHOUT ANY WARRANTY; without even the implied warranty of
20
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
* General Public License for more details.
25
#ifndef SWBASICFILTER_H
26
#define SWBASICFILTER_H
34
// not a protected inner class because MSVC++ sucks and can't handle it
35
class SWDLLEXPORT BasicFilterUserData {
37
BasicFilterUserData(const SWModule *module, const SWKey *key) { this->module = module; this->key = key; suspendTextPassThru = false; supressAdjacentWhitespace = false; }
38
virtual ~BasicFilterUserData() {}
39
const SWModule *module;
42
SWBuf lastSuspendSegment;
43
bool suspendTextPassThru;
44
bool supressAdjacentWhitespace;
47
/** A filter providing commonly used functionality.
48
* This filter has facilities for handling SGML/HTML/XML like tokens and
49
* escape strings (like SGML entities). It has the facility for just
50
* substituting the given tokens and escape strings to other strings and for
51
* "manual" custom token handling.
53
* In this class the functions with arguments looking as <code>char
54
* **buf</code> write a character sequnce at address specified by
55
* <code>*buf</code> address and change <code>*buf</code> to point past
56
* the last char of the written sequence.
58
class SWDLLEXPORT SWBasicFilter : public SWFilter {
70
bool escStringCaseSensitive;
71
bool tokenCaseSensitive;
72
bool passThruUnknownToken;
73
bool passThruUnknownEsc;
74
bool passThruNumericEsc;
82
virtual char processText(SWBuf &text, const SWKey *key = 0, const SWModule *module = 0);
83
virtual ~SWBasicFilter();
87
virtual BasicFilterUserData *createUserData(const SWModule *module, const SWKey *key) {
88
return new BasicFilterUserData(module, key);
92
static const char INITIALIZE; // flag for indicating processing before char loop
93
static const char PRECHAR; // flag for indicating processing at top in char loop
94
static const char POSTCHAR; // flag for indicating processing at bottom in char loop
95
static const char FINALIZE; // flag for indicating processing after char loop
98
/** Sets the beginning of escape sequence (by default "&").*/
99
void setEscapeStart(const char *escStart);
101
/** Sets the end of escape sequence (by default ";").*/
102
void setEscapeEnd(const char *escEnd);
104
/** Sets the beginning of token start sequence (by default "<").*/
105
void setTokenStart(const char *tokenStart);
107
/** Sets the end of token start sequence (by default ">").*/
108
void setTokenEnd(const char *tokenEnd);
110
/** Sets whether to pass thru an unknown token unchanged
112
* Default is false.*/
113
void setPassThruUnknownToken(bool val);
115
/** Sets whether to pass thru an unknown escape sequence unchanged
119
void setPassThruUnknownEscapeString(bool val);
121
/** Sets whether to pass thru a numeric escape sequence unchanged
122
* or allow it to be handled otherwise.
123
* Default is false.*/
124
void setPassThruNumericEscapeString(bool val);
126
/** Are escapeStrings case sensitive or not? Call this
127
* function before addEscapeStingSubstitute()
129
void setEscapeStringCaseSensitive(bool val);
131
/** Registers an esc control sequence that can pass unchanged
133
void addAllowedEscapeString(const char *findString);
135
/** Unregisters an esc control sequence that can pass unchanged
137
void removeAllowedEscapeString(const char *findString);
139
/** Registers an esc control sequence
141
void addEscapeStringSubstitute(const char *findString, const char *replaceString);
143
/** Unregisters an esc control sequence
145
void removeEscapeStringSubstitute(const char *findString);
147
/** This function performs the substitution of escapeStrings */
148
bool substituteEscapeString(SWBuf &buf, const char *escString);
150
/** This passes allowed escapeStrings */
151
bool passAllowedEscapeString(SWBuf &buf, const char *escString);
153
/** This appends escString to buf as an entity */
154
void appendEscapeString(SWBuf &buf, const char *escString);
156
/** Are tokens case sensitive (like in GBF) or not? Call this
157
* function before addTokenSubstitute()
159
void setTokenCaseSensitive(bool val);
161
/** Registers a simple token substitutions. Usually called from the
162
* c-tor of a subclass
164
void addTokenSubstitute(const char *findString, const char *replaceString);
166
/** Unregisters a simple token substitute
168
void removeTokenSubstitute(const char *findString);
170
/** This function performs the substitution of tokens */
171
bool substituteToken(SWBuf &buf, const char *token);
173
/** This function is called for every token encountered in the input text.
174
* @param buf the output buffer
175
* @param token the token (e.g. <code>"p align='left'"</code>
176
* @param userData user storage space for data transient to 1 full buffer parse
177
* @return subclasses should return true if they handled the token, or false if they did not.
179
virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData);
181
virtual bool processStage(char /*stage*/, SWBuf &/*text*/, char *&/*from*/, BasicFilterUserData * /*userData*/) { return false; }
182
virtual void setStageProcessing(char stages) { processStages = stages; } // see STATICs up above
184
/** This function is called for every escape sequence encountered in the input text.
185
* @param buf the output buffer
186
* @param escString the escape sequence (e.g. <code>"amp"</code> for &amp;)
187
* @param userData user storage space for data transient to 1 full buffer parse
188
* @return <code>false</code> if was not handled and should be handled in
189
* @return subclasses should return true if they handled the esc seq, or false if they did not.
191
virtual bool handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData);
193
/** This function is called for all numeric escape sequences. If passThrough
194
* @param buf the output buffer
195
* @param escString the escape sequence (e.g. <code>"#235"</code> for &235;)
196
* @return subclasses should return true if they handled the esc seq, or false if they did not.
198
virtual bool handleNumericEscapeString(SWBuf &buf, const char *escString);