2
* FBReader -- electronic book reader
3
* Copyright (C) 2004-2007 Nikolay Pultsin <geometer@mawhrin.net>
4
* Copyright (C) 2005 Mikhail Sobolev <mss@mawhrin.net>
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24
#include <ZLFileImage.h>
25
#include <ZLUnicodeUtil.h>
27
#include "XHTMLReader.h"
28
#include "../util/EntityFilesCollector.h"
29
#include "../util/MiscUtil.h"
31
#include "../../bookmodel/BookReader.h"
32
#include "../../bookmodel/BookModel.h"
34
std::map<std::string,XHTMLTagAction*> XHTMLReader::ourTagActions;
36
XHTMLTagAction::~XHTMLTagAction() {
39
class XHTMLTagParagraphAction : public XHTMLTagAction {
42
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
43
void doAtEnd(XHTMLReader &reader);
46
class XHTMLTagRestartParagraphAction : public XHTMLTagAction {
49
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
50
void doAtEnd(XHTMLReader &reader);
53
class XHTMLTagImageAction : public XHTMLTagAction {
56
XHTMLTagImageAction(const std::string &nameAttribute);
58
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
59
void doAtEnd(XHTMLReader &reader);
62
const std::string myNameAttribute;
65
class XHTMLTagItemAction : public XHTMLTagAction {
68
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
69
void doAtEnd(XHTMLReader &reader);
72
class XHTMLTagHyperlinkAction : public XHTMLTagAction {
75
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
76
void doAtEnd(XHTMLReader &reader);
79
std::stack<FBTextKind> myHyperlinkStack;
82
class XHTMLTagControlAction : public XHTMLTagAction {
85
XHTMLTagControlAction(FBTextKind control);
87
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
88
void doAtEnd(XHTMLReader &reader);
94
class XHTMLTagParagraphWithControlAction : public XHTMLTagAction {
97
XHTMLTagParagraphWithControlAction(FBTextKind control);
99
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
100
void doAtEnd(XHTMLReader &reader);
103
FBTextKind myControl;
106
class XHTMLTagPreAction : public XHTMLTagAction {
109
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
110
void doAtEnd(XHTMLReader &reader);
113
void XHTMLTagParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
114
reader.myModelReader.beginParagraph();
117
void XHTMLTagParagraphAction::doAtEnd(XHTMLReader &reader) {
118
reader.myModelReader.endParagraph();
121
void XHTMLTagRestartParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
122
reader.myModelReader.endParagraph();
123
reader.myModelReader.beginParagraph();
126
void XHTMLTagRestartParagraphAction::doAtEnd(XHTMLReader&) {
129
void XHTMLTagItemAction::doAtStart(XHTMLReader &reader, const char**) {
130
reader.myModelReader.endParagraph();
131
// TODO: increase left indent
132
reader.myModelReader.beginParagraph();
133
// TODO: replace bullet sign by number inside OL tag
134
const std::string bullet = "\xE2\x80\xA2\xC0\xA0";
135
reader.myModelReader.addData(bullet);
138
void XHTMLTagItemAction::doAtEnd(XHTMLReader &reader) {
139
reader.myModelReader.endParagraph();
142
XHTMLTagImageAction::XHTMLTagImageAction(const std::string &nameAttribute) : myNameAttribute(nameAttribute) {
145
void XHTMLTagImageAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
146
const char *fileName = reader.attributeValue(xmlattributes, myNameAttribute);
148
bool flag = reader.myModelReader.paragraphIsOpen();
150
reader.myModelReader.endParagraph();
152
if ((strlen(fileName) > 2) && strncmp(fileName, "./", 2) == 0) {
155
reader.myModelReader.addImageReference(fileName);
156
reader.myModelReader.addImage(fileName, new ZLFileImage("image/auto", reader.myPathPrefix + fileName, 0));
158
reader.myModelReader.beginParagraph();
163
void XHTMLTagImageAction::doAtEnd(XHTMLReader&) {
166
XHTMLTagControlAction::XHTMLTagControlAction(FBTextKind control) : myControl(control) {
169
void XHTMLTagControlAction::doAtStart(XHTMLReader &reader, const char**) {
170
reader.myModelReader.pushKind(myControl);
171
reader.myModelReader.addControl(myControl, true);
174
void XHTMLTagControlAction::doAtEnd(XHTMLReader &reader) {
175
reader.myModelReader.addControl(myControl, false);
176
reader.myModelReader.popKind();
179
void XHTMLTagHyperlinkAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
180
const char *href = reader.attributeValue(xmlattributes, "href");
182
const std::string link = (*href == '#') ? (reader.myReferenceName + href) : href;
183
FBTextKind hyperlinkType = MiscUtil::isReference(link) ? EXTERNAL_HYPERLINK : INTERNAL_HYPERLINK;
184
myHyperlinkStack.push(hyperlinkType);
185
reader.myModelReader.addHyperlinkControl(hyperlinkType, link);
187
myHyperlinkStack.push(REGULAR);
189
const char *name = reader.attributeValue(xmlattributes, "name");
191
reader.myModelReader.addHyperlinkLabel(reader.myReferenceName + "#" + name);
195
void XHTMLTagHyperlinkAction::doAtEnd(XHTMLReader &reader) {
196
FBTextKind kind = myHyperlinkStack.top();
197
if (kind != REGULAR) {
198
reader.myModelReader.addControl(kind, false);
200
myHyperlinkStack.pop();
203
XHTMLTagParagraphWithControlAction::XHTMLTagParagraphWithControlAction(FBTextKind control) : myControl(control) {
206
void XHTMLTagParagraphWithControlAction::doAtStart(XHTMLReader &reader, const char**) {
207
if ((myControl == TITLE) && (reader.myModelReader.model().bookTextModel()->paragraphsNumber() > 1)) {
208
reader.myModelReader.insertEndOfSectionParagraph();
210
reader.myModelReader.beginParagraph();
211
reader.myModelReader.addControl(myControl, true);
214
void XHTMLTagParagraphWithControlAction::doAtEnd(XHTMLReader &reader) {
215
reader.myModelReader.addControl(myControl, false);
216
reader.myModelReader.endParagraph();
219
void XHTMLTagPreAction::doAtStart(XHTMLReader &reader, const char**) {
220
reader.myPreformatted = true;
221
reader.myModelReader.beginParagraph();
222
reader.myModelReader.addControl(CODE, true);
225
void XHTMLTagPreAction::doAtEnd(XHTMLReader &reader) {
226
reader.myModelReader.addControl(CODE, false);
227
reader.myModelReader.endParagraph();
228
reader.myPreformatted = false;
231
void XHTMLReader::addAction(const std::string &tag, XHTMLTagAction *action) {
232
ourTagActions.insert(std::pair<std::string,XHTMLTagAction*>(tag,action));
235
void XHTMLReader::fillTagTable() {
236
if (ourTagActions.empty()) {
237
//addAction("html", new XHTMLTagAction());
238
addAction("body", new XHTMLTagParagraphAction());
239
//addAction("title", new XHTMLTagAction());
240
//addAction("meta", new XHTMLTagAction());
241
//addAction("script", new XHTMLTagAction());
243
//addAction("font", new XHTMLTagAction());
244
//addAction("style", new XHTMLTagAction());
246
addAction("p", new XHTMLTagParagraphAction());
247
addAction("h1", new XHTMLTagParagraphWithControlAction(H1));
248
addAction("h2", new XHTMLTagParagraphWithControlAction(H2));
249
addAction("h3", new XHTMLTagParagraphWithControlAction(H3));
250
addAction("h4", new XHTMLTagParagraphWithControlAction(H4));
251
addAction("h5", new XHTMLTagParagraphWithControlAction(H5));
252
addAction("h6", new XHTMLTagParagraphWithControlAction(H6));
254
//addAction("ol", new XHTMLTagAction());
255
//addAction("ul", new XHTMLTagAction());
256
//addAction("dl", new XHTMLTagAction());
257
addAction("li", new XHTMLTagItemAction());
259
addAction("strong", new XHTMLTagControlAction(STRONG));
260
addAction("b", new XHTMLTagControlAction(BOLD));
261
addAction("em", new XHTMLTagControlAction(EMPHASIS));
262
addAction("i", new XHTMLTagControlAction(ITALIC));
263
addAction("code", new XHTMLTagControlAction(CODE));
264
addAction("tt", new XHTMLTagControlAction(CODE));
265
addAction("kbd", new XHTMLTagControlAction(CODE));
266
addAction("var", new XHTMLTagControlAction(CODE));
267
addAction("samp", new XHTMLTagControlAction(CODE));
268
addAction("cite", new XHTMLTagControlAction(CITE));
269
addAction("sub", new XHTMLTagControlAction(SUB));
270
addAction("sup", new XHTMLTagControlAction(SUP));
271
addAction("dd", new XHTMLTagControlAction(DEFINITION_DESCRIPTION));
272
addAction("dfn", new XHTMLTagControlAction(DEFINITION));
273
addAction("strike", new XHTMLTagControlAction(STRIKETHROUGH));
275
addAction("a", new XHTMLTagHyperlinkAction());
277
addAction("img", new XHTMLTagImageAction("src"));
278
addAction("object", new XHTMLTagImageAction("data"));
280
//addAction("area", new XHTMLTagAction());
281
//addAction("map", new XHTMLTagAction());
283
//addAction("base", new XHTMLTagAction());
284
//addAction("blockquote", new XHTMLTagAction());
285
addAction("br", new XHTMLTagRestartParagraphAction());
286
//addAction("center", new XHTMLTagAction());
287
addAction("div", new XHTMLTagParagraphAction());
288
//addAction("dt", new XHTMLTagAction());
289
//addAction("head", new XHTMLTagAction());
290
//addAction("hr", new XHTMLTagAction());
291
//addAction("link", new XHTMLTagAction());
292
//addAction("param", new XHTMLTagAction());
293
//addAction("q", new XHTMLTagAction());
294
//addAction("s", new XHTMLTagAction());
296
addAction("pre", new XHTMLTagPreAction());
297
//addAction("big", new XHTMLTagAction());
298
//addAction("small", new XHTMLTagAction());
299
//addAction("u", new XHTMLTagAction());
301
//addAction("table", new XHTMLTagAction());
302
addAction("td", new XHTMLTagParagraphAction());
303
addAction("th", new XHTMLTagParagraphAction());
304
//addAction("tr", new XHTMLTagAction());
305
//addAction("caption", new XHTMLTagAction());
306
//addAction("span", new XHTMLTagAction());
310
XHTMLReader::XHTMLReader(BookReader &modelReader) : myModelReader(modelReader) {
313
bool XHTMLReader::readFile(const std::string &pathPrefix, const std::string &fileName, const std::string &referenceName) {
314
myModelReader.addHyperlinkLabel(referenceName);
318
myPathPrefix = pathPrefix;
319
myReferenceName = referenceName;
321
myPreformatted = false;
323
return readDocument(pathPrefix + fileName);
327
void XHTMLReader::startElementHandler(const char *tag, const char **attributes) {
328
static const std::string HASH = "#";
329
const char *id = attributeValue(attributes, "id");
331
myModelReader.addHyperlinkLabel(myReferenceName + HASH + id);
334
XHTMLTagAction *action = ourTagActions[ZLUnicodeUtil::toLower(tag)];
336
action->doAtStart(*this, attributes);
340
void XHTMLReader::endElementHandler(const char *tag) {
341
XHTMLTagAction *action = ourTagActions[ZLUnicodeUtil::toLower(tag)];
343
action->doAtEnd(*this);
347
void XHTMLReader::characterDataHandler(const char *text, int len) {
348
if (myPreformatted) {
349
if ((*text == '\r') || (*text == '\n')) {
350
myModelReader.addControl(CODE, false);
351
myModelReader.endParagraph();
352
myModelReader.beginParagraph();
353
myModelReader.addControl(CODE, true);
355
int spaceCounter = 0;
356
while ((spaceCounter < len) && isspace((unsigned char)*text)) {
359
myModelReader.addFixedHSpace(spaceCounter);
360
text += spaceCounter;
364
myModelReader.addData(std::string(text, len));
368
const std::vector<std::string> &XHTMLReader::externalDTDs() const {
369
return EntityFilesCollector::instance().externalDTDs("xhtml");