2
* Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4
* This program is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
#include <ZLFileImage.h>
23
#include <ZLUnicodeUtil.h>
25
#include "XHTMLReader.h"
26
#include "../util/EntityFilesCollector.h"
27
#include "../util/MiscUtil.h"
29
#include "../../bookmodel/BookReader.h"
30
#include "../../bookmodel/BookModel.h"
32
std::map<std::string,XHTMLTagAction*> XHTMLReader::ourTagActions;
34
XHTMLTagAction::~XHTMLTagAction() {
37
class XHTMLTagParagraphAction : public XHTMLTagAction {
40
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
41
void doAtEnd(XHTMLReader &reader);
44
class XHTMLTagRestartParagraphAction : public XHTMLTagAction {
47
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
48
void doAtEnd(XHTMLReader &reader);
51
class XHTMLTagImageAction : public XHTMLTagAction {
54
XHTMLTagImageAction(const std::string &nameAttribute);
56
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
57
void doAtEnd(XHTMLReader &reader);
60
const std::string myNameAttribute;
63
class XHTMLTagItemAction : public XHTMLTagAction {
66
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
67
void doAtEnd(XHTMLReader &reader);
70
class XHTMLTagHyperlinkAction : public XHTMLTagAction {
73
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
74
void doAtEnd(XHTMLReader &reader);
77
std::stack<FBTextKind> myHyperlinkStack;
80
class XHTMLTagControlAction : public XHTMLTagAction {
83
XHTMLTagControlAction(FBTextKind control);
85
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
86
void doAtEnd(XHTMLReader &reader);
92
class XHTMLTagParagraphWithControlAction : public XHTMLTagAction {
95
XHTMLTagParagraphWithControlAction(FBTextKind control);
97
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
98
void doAtEnd(XHTMLReader &reader);
101
FBTextKind myControl;
104
class XHTMLTagPreAction : public XHTMLTagAction {
107
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
108
void doAtEnd(XHTMLReader &reader);
111
void XHTMLTagParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
112
reader.myModelReader.beginParagraph();
115
void XHTMLTagParagraphAction::doAtEnd(XHTMLReader &reader) {
116
reader.myModelReader.endParagraph();
119
void XHTMLTagRestartParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
120
reader.myModelReader.endParagraph();
121
reader.myModelReader.beginParagraph();
124
void XHTMLTagRestartParagraphAction::doAtEnd(XHTMLReader&) {
127
void XHTMLTagItemAction::doAtStart(XHTMLReader &reader, const char**) {
128
reader.myModelReader.endParagraph();
129
// TODO: increase left indent
130
reader.myModelReader.beginParagraph();
131
// TODO: replace bullet sign by number inside OL tag
132
const std::string bullet = "\xE2\x80\xA2\xC0\xA0";
133
reader.myModelReader.addData(bullet);
136
void XHTMLTagItemAction::doAtEnd(XHTMLReader &reader) {
137
reader.myModelReader.endParagraph();
140
XHTMLTagImageAction::XHTMLTagImageAction(const std::string &nameAttribute) : myNameAttribute(nameAttribute) {
143
void XHTMLTagImageAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
144
const char *fileName = reader.attributeValue(xmlattributes, myNameAttribute.c_str());
146
bool flag = reader.myModelReader.paragraphIsOpen();
148
reader.myModelReader.endParagraph();
150
if ((strlen(fileName) > 2) && strncmp(fileName, "./", 2) == 0) {
153
reader.myModelReader.addImageReference(fileName);
154
reader.myModelReader.addImage(fileName, new ZLFileImage("image/auto", reader.myPathPrefix + fileName, 0));
156
reader.myModelReader.beginParagraph();
161
void XHTMLTagImageAction::doAtEnd(XHTMLReader&) {
164
XHTMLTagControlAction::XHTMLTagControlAction(FBTextKind control) : myControl(control) {
167
void XHTMLTagControlAction::doAtStart(XHTMLReader &reader, const char**) {
168
reader.myModelReader.pushKind(myControl);
169
reader.myModelReader.addControl(myControl, true);
172
void XHTMLTagControlAction::doAtEnd(XHTMLReader &reader) {
173
reader.myModelReader.addControl(myControl, false);
174
reader.myModelReader.popKind();
177
void XHTMLTagHyperlinkAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
178
const char *href = reader.attributeValue(xmlattributes, "href");
180
const std::string link = (*href == '#') ? (reader.myReferenceName + href) : href;
181
FBTextKind hyperlinkType = MiscUtil::isReference(link) ? EXTERNAL_HYPERLINK : INTERNAL_HYPERLINK;
182
myHyperlinkStack.push(hyperlinkType);
183
reader.myModelReader.addHyperlinkControl(hyperlinkType, link);
185
myHyperlinkStack.push(REGULAR);
187
const char *name = reader.attributeValue(xmlattributes, "name");
189
reader.myModelReader.addHyperlinkLabel(reader.myReferenceName + "#" + name);
193
void XHTMLTagHyperlinkAction::doAtEnd(XHTMLReader &reader) {
194
FBTextKind kind = myHyperlinkStack.top();
195
if (kind != REGULAR) {
196
reader.myModelReader.addControl(kind, false);
198
myHyperlinkStack.pop();
201
XHTMLTagParagraphWithControlAction::XHTMLTagParagraphWithControlAction(FBTextKind control) : myControl(control) {
204
void XHTMLTagParagraphWithControlAction::doAtStart(XHTMLReader &reader, const char**) {
205
if ((myControl == TITLE) && (reader.myModelReader.model().bookTextModel()->paragraphsNumber() > 1)) {
206
reader.myModelReader.insertEndOfSectionParagraph();
208
reader.myModelReader.pushKind(myControl);
209
reader.myModelReader.beginParagraph();
212
void XHTMLTagParagraphWithControlAction::doAtEnd(XHTMLReader &reader) {
213
reader.myModelReader.endParagraph();
214
reader.myModelReader.popKind();
217
void XHTMLTagPreAction::doAtStart(XHTMLReader &reader, const char**) {
218
reader.myPreformatted = true;
219
reader.myModelReader.beginParagraph();
220
reader.myModelReader.addControl(CODE, true);
223
void XHTMLTagPreAction::doAtEnd(XHTMLReader &reader) {
224
reader.myModelReader.addControl(CODE, false);
225
reader.myModelReader.endParagraph();
226
reader.myPreformatted = false;
229
void XHTMLReader::addAction(const std::string &tag, XHTMLTagAction *action) {
230
ourTagActions.insert(std::pair<std::string,XHTMLTagAction*>(tag,action));
233
void XHTMLReader::fillTagTable() {
234
if (ourTagActions.empty()) {
235
//addAction("html", new XHTMLTagAction());
236
addAction("body", new XHTMLTagParagraphAction());
237
//addAction("title", new XHTMLTagAction());
238
//addAction("meta", new XHTMLTagAction());
239
//addAction("script", new XHTMLTagAction());
241
//addAction("font", new XHTMLTagAction());
242
//addAction("style", new XHTMLTagAction());
244
addAction("p", new XHTMLTagParagraphAction());
245
addAction("h1", new XHTMLTagParagraphWithControlAction(H1));
246
addAction("h2", new XHTMLTagParagraphWithControlAction(H2));
247
addAction("h3", new XHTMLTagParagraphWithControlAction(H3));
248
addAction("h4", new XHTMLTagParagraphWithControlAction(H4));
249
addAction("h5", new XHTMLTagParagraphWithControlAction(H5));
250
addAction("h6", new XHTMLTagParagraphWithControlAction(H6));
252
//addAction("ol", new XHTMLTagAction());
253
//addAction("ul", new XHTMLTagAction());
254
//addAction("dl", new XHTMLTagAction());
255
addAction("li", new XHTMLTagItemAction());
257
addAction("strong", new XHTMLTagControlAction(STRONG));
258
addAction("b", new XHTMLTagControlAction(BOLD));
259
addAction("em", new XHTMLTagControlAction(EMPHASIS));
260
addAction("i", new XHTMLTagControlAction(ITALIC));
261
addAction("code", new XHTMLTagControlAction(CODE));
262
addAction("tt", new XHTMLTagControlAction(CODE));
263
addAction("kbd", new XHTMLTagControlAction(CODE));
264
addAction("var", new XHTMLTagControlAction(CODE));
265
addAction("samp", new XHTMLTagControlAction(CODE));
266
addAction("cite", new XHTMLTagControlAction(CITE));
267
addAction("sub", new XHTMLTagControlAction(SUB));
268
addAction("sup", new XHTMLTagControlAction(SUP));
269
addAction("dd", new XHTMLTagControlAction(DEFINITION_DESCRIPTION));
270
addAction("dfn", new XHTMLTagControlAction(DEFINITION));
271
addAction("strike", new XHTMLTagControlAction(STRIKETHROUGH));
273
addAction("a", new XHTMLTagHyperlinkAction());
275
addAction("img", new XHTMLTagImageAction("src"));
276
addAction("object", new XHTMLTagImageAction("data"));
278
//addAction("area", new XHTMLTagAction());
279
//addAction("map", new XHTMLTagAction());
281
//addAction("base", new XHTMLTagAction());
282
//addAction("blockquote", new XHTMLTagAction());
283
addAction("br", new XHTMLTagRestartParagraphAction());
284
//addAction("center", new XHTMLTagAction());
285
addAction("div", new XHTMLTagParagraphAction());
286
//addAction("dt", new XHTMLTagAction());
287
//addAction("head", new XHTMLTagAction());
288
//addAction("hr", new XHTMLTagAction());
289
//addAction("link", new XHTMLTagAction());
290
//addAction("param", new XHTMLTagAction());
291
//addAction("q", new XHTMLTagAction());
292
//addAction("s", new XHTMLTagAction());
294
addAction("pre", new XHTMLTagPreAction());
295
//addAction("big", new XHTMLTagAction());
296
//addAction("small", new XHTMLTagAction());
297
//addAction("u", new XHTMLTagAction());
299
//addAction("table", new XHTMLTagAction());
300
addAction("td", new XHTMLTagParagraphAction());
301
addAction("th", new XHTMLTagParagraphAction());
302
//addAction("tr", new XHTMLTagAction());
303
//addAction("caption", new XHTMLTagAction());
304
//addAction("span", new XHTMLTagAction());
308
XHTMLReader::XHTMLReader(BookReader &modelReader) : myModelReader(modelReader) {
311
bool XHTMLReader::readFile(const std::string &pathPrefix, const std::string &fileName, const std::string &referenceName) {
312
myModelReader.addHyperlinkLabel(referenceName);
316
myPathPrefix = pathPrefix;
317
myReferenceName = referenceName;
319
myPreformatted = false;
321
return readDocument(pathPrefix + fileName);
325
void XHTMLReader::startElementHandler(const char *tag, const char **attributes) {
326
static const std::string HASH = "#";
327
const char *id = attributeValue(attributes, "id");
329
myModelReader.addHyperlinkLabel(myReferenceName + HASH + id);
332
XHTMLTagAction *action = ourTagActions[ZLUnicodeUtil::toLower(tag)];
334
action->doAtStart(*this, attributes);
338
void XHTMLReader::endElementHandler(const char *tag) {
339
XHTMLTagAction *action = ourTagActions[ZLUnicodeUtil::toLower(tag)];
341
action->doAtEnd(*this);
345
void XHTMLReader::characterDataHandler(const char *text, int len) {
346
if (myPreformatted) {
347
if ((*text == '\r') || (*text == '\n')) {
348
myModelReader.addControl(CODE, false);
349
myModelReader.endParagraph();
350
myModelReader.beginParagraph();
351
myModelReader.addControl(CODE, true);
353
int spaceCounter = 0;
354
while ((spaceCounter < len) && isspace((unsigned char)*text)) {
357
myModelReader.addFixedHSpace(spaceCounter);
358
text += spaceCounter;
362
myModelReader.addData(std::string(text, len));
366
const std::vector<std::string> &XHTMLReader::externalDTDs() const {
367
return EntityFilesCollector::instance().externalDTDs("xhtml");