3
* This file is part of BibleTime's source code, http://www.bibletime.info/.
5
* Copyright 1999-2006 by the BibleTime developers.
6
* The BibleTime source code is licensed under the GNU General Public License version 2.0.
13
#include "cswordmoduleinfo.h"
14
#include "cswordbackend.h"
15
#include "util/cpointers.h"
17
#include "bt_gbfhtml.h"
30
using namespace Filters;
32
BT_GBFHTML::BT_GBFHTML() : sword::GBFHTML() {
34
setEscapeStringCaseSensitive(true);
35
setPassThruUnknownEscapeString(true); //the HTML widget will render the HTML escape codes
37
removeTokenSubstitute("Rf");
38
// addTokenSubstitute("RB", "<span>"); //start of a footnote with embedded text
40
addTokenSubstitute("FI", "<span class=\"italic\">"); // italics begin
41
addTokenSubstitute("Fi", "</span>");
43
addTokenSubstitute("FB", "<span class=\"bold\">"); // bold begin
44
addTokenSubstitute("Fb", "</span>");
46
addTokenSubstitute("FR", "<span class=\"jesuswords\">");
47
addTokenSubstitute("Fr", "</span>");
49
addTokenSubstitute("FU", "<u>"); // underline begin
50
addTokenSubstitute("Fu", "</u>");
52
addTokenSubstitute("FO", "<span class=\"quotation\">"); // Old Testament quote begin
53
addTokenSubstitute("Fo", "</span>");
56
addTokenSubstitute("FS", "<span class=\"sup\">"); // Superscript begin// Subscript begin
57
addTokenSubstitute("Fs", "</span>");
59
addTokenSubstitute("FV", "<span class=\"sub\">"); // Subscript begin
60
addTokenSubstitute("Fv", "</span>");
62
addTokenSubstitute("TT", "<div class=\"booktitle\">");
63
addTokenSubstitute("Tt", "</div>");
65
addTokenSubstitute("TS", "<div class=\"sectiontitle\">");
66
addTokenSubstitute("Ts", "</div>");
68
//addTokenSubstitute("PP", "<span class=\"poetry\">"); // poetry begin
69
//addTokenSubstitute("Pp", "</span>");
72
addTokenSubstitute("Fn", "</font>"); // font end
73
addTokenSubstitute("CL", "<br/>"); // new line
74
addTokenSubstitute("CM", "<br/>"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired
76
addTokenSubstitute("CG", ">"); // literal greater-than sign
77
addTokenSubstitute("CT", "<"); // literal less-than sign
79
addTokenSubstitute("JR", "<span class=\"right\">"); // right align begin
80
addTokenSubstitute("JC", "<span class=\"center\">"); // center align begin
81
addTokenSubstitute("JL", "</span>"); // align end
84
/** No descriptions */
85
char BT_GBFHTML::processText(sword::SWBuf& buf, const sword::SWKey * key, const sword::SWModule * module) {
86
GBFHTML::processText(buf, key, module);
88
if (!module->isProcessEntryAttributes()) {
89
return 1; //no processing should be done, may happen in a search
92
CSwordModuleInfo* m = CPointers::backend()->findModuleByName( module->Name() );
94
if (m && !(m->has(CSwordModuleInfo::lemmas) || m->has(CSwordModuleInfo::morphTags) || m->has(CSwordModuleInfo::strongNumbers))) { //only parse if the module has strongs or lemmas
95
return 1; //WARNING: Return alread here
98
//Am Anfang<WH07225> schuf<WH01254><WTH8804> Gott<WH0430> Himmel<WH08064> und<WT> Erde<WH0776>.
99
//A simple word<WT> means: No entry for this word "word"
102
QString t = QString::fromUtf8(buf.c_str());
104
QRegExp tag("([.,;:]?<W[HGT][^>]*>\\s*)+");
108
int lastMatchEnd = 0;
110
int pos = tag.search(t,0);
112
if (pos == -1) { //no strong or morph code found in this text
113
return 1; //WARNING: Return already here
116
//split the text into parts which end with the GBF tag marker for strongs/lemmas
118
list.append(t.mid(lastMatchEnd, pos+tag.matchedLength()-lastMatchEnd));
120
lastMatchEnd = pos + tag.matchedLength();
121
pos = tag.search(t, pos + tag.matchedLength());
124
//append the trailing text to the list.
125
if (!t.right(t.length() - lastMatchEnd).isEmpty()) {
126
list.append(t.right(t.length() - lastMatchEnd));
129
//list is now a list of words with 1-n Strongs at the end, which belong to this word.
131
//now create the necessary HTML in list entries and concat them to the result
132
tag = QRegExp("<W([HGT])([^>]*)>");
133
tag.setMinimal(true);
135
for (QStringList::iterator it = list.begin(); it != list.end(); ++it) {
136
QString e = (*it); //current entry to process
137
//qWarning(e.latin1());
139
//check if there is a word to which the strongs info belongs to.
140
//If yes, wrap that word with the strongs info
141
//If not, leave out the strongs info, because it can't be tight to a text
142
//Comparing the first char with < is not enough, because the tokenReplace is done already
143
//so there might be html tags already.
144
const bool textPresent = (e.stripWhiteSpace().remove(QRegExp("[.,;:]")).left(2) != "<W");
151
int pos = tag.search(e, 0); //try to find a strong number marker
152
bool insertedTag = false;
153
bool hasLemmaAttr = false;
154
bool hasMorphAttr = false;
156
QString value = QString::null;
157
int tagAttributeStart = -1;
159
while (pos != -1) { //work on all strong/lemma tags in this section, should be between 1-3 loops
160
const bool isMorph = (tag.cap(1) == "T");
161
value = isMorph ? tag.cap(2) : tag.cap(2).prepend( tag.cap(1) );
163
if (value.isEmpty()) {
168
if (!insertedTag) { //we have to insert a new tag end and beginning, i.e. our first loop
169
e.replace(pos, tag.matchedLength(), "</span>");
172
//skip blanks, commas, dots and stuff at the beginning, it doesn't belong to the morph code
173
QString rep("<span ");
174
rep.append(isMorph ? "morph" : "lemma").append("=\"").append(value).append("\">");
176
hasMorphAttr = isMorph;
177
hasLemmaAttr = !isMorph;
180
QChar c = e[startPos];
182
while ((startPos < pos) && (c.isSpace() || c.isPunct())) {
188
e.insert( startPos, rep );
189
tagAttributeStart = startPos + 6; //to point to the start of the attributes
192
else { //add the attribute to the existing tag
193
e.remove(pos, tag.matchedLength());
195
if (tagAttributeStart == -1) {
196
continue; //nothing valid found
199
if ((!isMorph && hasLemmaAttr) || (isMorph && hasMorphAttr)) { //we append another attribute value, e.g. 3000 gets 3000|5000
200
//search the existing attribute start
201
QRegExp attrRegExp( isMorph ? "morph=\".+(?=\")" : "lemma=\".+(?=\")" );
202
attrRegExp.setMinimal(true);
203
const int foundPos = e.find(attrRegExp, tagAttributeStart);
205
if (foundPos != -1) {
206
e.insert(foundPos + attrRegExp.matchedLength(), QString("|").append(value));
207
pos += value.length() + 1;
209
hasLemmaAttr = !isMorph;
210
hasMorphAttr = isMorph;
213
else { //attribute was not yet inserted
215
attr.setLatin1(isMorph ? "morph" : "lemma").append("=\"").append(value).append("\" ");
217
e.insert(tagAttributeStart, attr);
218
pos += attr.length();
220
hasMorphAttr = isMorph;
221
hasLemmaAttr = !isMorph;
224
//tagAttributeStart remains the same
228
pos = tag.search(e, pos);
235
buf = (const char*)result.utf8();
241
bool BT_GBFHTML::handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) {
242
if (!substituteToken(buf, token)) { //more than a simple replace
243
const unsigned int tokenLength = strlen(token);
247
BT_UserData* myUserData = dynamic_cast<BT_UserData*>(userData);
248
sword::SWModule* myModule = const_cast<sword::SWModule*>(myUserData->module); //hack to be able to call stuff like Lang()
250
if ( !strncmp(token, "WG", 2)
251
|| !strncmp(token, "WH", 2)
252
|| !strncmp(token, "WT", 2) ) {
257
else if (!strncmp(token, "RB", 2)) {
258
myUserData->hasFootnotePreTag = true;
259
buf.append("<span class=\"footnotepre\">");
261
else if (!strncmp(token, "RF", 2)) {
262
//we use several append calls because appendFormatted slows down filtering, which should be fast
264
if (myUserData->hasFootnotePreTag) {
265
// qWarning("inserted footnotepre end");
266
buf.append("</span>");
267
myUserData->hasFootnotePreTag = false;
270
buf.append(" <span class=\"footnote\" note=\"");
271
buf.append(myModule->Name());
273
buf.append(myUserData->key->getShortText());
275
buf.append( QString::number(myUserData->swordFootnote++).latin1() );
276
buf.append("\">*</span> ");
278
userData->suspendTextPassThru = true;
280
else if (!strncmp(token, "Rf", 2)) { //end of footnote
281
userData->suspendTextPassThru = false;
283
else if (!strncmp(token, "FN", 2)) { //the end </font> tag is inserted in addTokenSubsitute
284
buf.append("<font face=\"");
286
for (i = 2; i < tokenLength; i++) {
287
if(token[i] != '\"') {
288
buf.append( token[i] );
294
else if (!strncmp(token, "CA", 2)) { // ASCII value
295
buf.append( (char)atoi(&token[2]) );
298
return GBFHTML::handleToken(buf, token, userData);