1
/***************************************************************************
2
khtmlreader.cpp - description
5
copyright : (C) 2001 by Frank Dekervel
6
email : Frank.Dekervel@student.kuleuven.ac.be
7
***************************************************************************/
9
/***************************************************************************
11
* This program is free software; you can redistribute it and/or modify *
12
* it under the terms of the GNU lesser General Public License as *
14
* the Free Software Foundation; either version 2 of the License, or *
15
* (at your option) any later version. *
17
***************************************************************************/
19
#include "khtmlreader.h"
21
#include "khtmlreader.moc"
23
#include <dom/dom_text.h>
24
#include <dom/dom2_views.h>
25
#include <dom/dom_doc.h>
27
#include <dom/dom_element.h>
28
#include <dom/html_table.h>
29
#include <khtmlview.h>
31
#include <kapplication.h>
32
#include <dom/html_misc.h>
35
KHTMLReader::KHTMLReader(KWDWriter *w){
36
_html=new KHTMLPart();
41
// if a troll ever sees this, he can't kill me anyway. (and he should kill dfaure first)
42
void qt_enter_modal( QWidget *widget );
43
void qt_leave_modal( QWidget *widget );
46
bool KHTMLReader::filter(KURL url) {
47
kdDebug() << "KHTMLReader::filter" << endl;
48
QObject::connect(_html,SIGNAL(completed()),this,SLOT(completed()));
53
_html->view()->resize(600,530);
54
_html->setAutoloadImages(false);
55
_html->setJScriptEnabled(false);
56
_html->setPluginsEnabled(false);
57
_html->setJavaEnabled(false);
58
if (_html->openURL(url) == false) {
59
kdWarning() << "openURL returned false" << endl;
63
//FIXME use synchronous IO instead of this hack if possible.
64
QWidget dummy(0,0,WType_Dialog | WShowModal);
65
qt_enter_modal(&dummy);
67
qt_leave_modal(&dummy);
71
HTMLReader_state *KHTMLReader::state() {
72
if (_state.count() == 0) {
73
HTMLReader_state *s=new HTMLReader_state;
74
s->frameset=_writer->mainFrameset();
75
s->paragraph = _writer->addParagraph(s->frameset);
76
s->format=_writer->currentFormat(s->paragraph,true);
77
s->layout=_writer->currentLayout(s->paragraph);
83
HTMLReader_state *KHTMLReader::pushNewState() {
84
HTMLReader_state *s=new HTMLReader_state;
85
s->frameset=state()->frameset;
86
s->paragraph=state()->paragraph;
87
s->format=state()->format;
88
s->layout=state()->layout;
89
_writer->cleanUpParagraph(s->paragraph);
95
void KHTMLReader::popState() {
97
HTMLReader_state *s=_state.pop();
100
the recursion trough html is somewhat clumsy still, i'm working on a better method.
101
popState gets called when a tag is closed, but since a closed tag doesn't mean the end
102
of a (kword) "tag" we have to copy some things over from the closed tag:
103
- the paragraph (after a </B>, we still are in the same paragraph, but
104
inside the <B></B> , there might have been a <BR>)
105
if we go back into another frameset, we start a new paragraph.
107
if (s->frameset == state()->frameset)
109
state()->paragraph=s->paragraph;
110
if ((state()->layout != s->layout)) {
111
startNewLayout(false,state()->layout);
113
state()->format=_writer->startFormat(state()->paragraph, state()->format);
118
void KHTMLReader::startNewLayout(bool startNewFormat) {
120
startNewLayout(startNewFormat,layout);
123
void KHTMLReader::startNewLayout(bool startNewFormat, QDomElement layout) {
124
if (!(_writer->getText(state()->paragraph).isEmpty())) {
125
startNewParagraph(startNewFormat,true);
127
state()->layout=_writer->setLayout(state()->paragraph,layout);
131
void KHTMLReader::completed() {
132
kdDebug() << "KHTMLReader::completed" << endl;
134
DOM::Document doc=_html->document(); // FIXME parse <HEAD> too
135
DOM::NodeList list=doc.getElementsByTagName("body");
136
DOM::Node docbody=list.item(0);
138
if (docbody.isNull()) {
139
kdWarning() << "no <BODY>, giving up" << endl;
147
list = doc.getElementsByTagName("head");
148
DOM::Node dochead=list.item(0);
149
if (!dochead.isNull())
152
kdWarning() << "WARNING: no html <HEAD> section" << endl;
154
_writer->cleanUpParagraph(state()->paragraph);
155
_it_worked=_writer->writeDoc();
159
void KHTMLReader::parseNode(DOM::Node node) {
161
// check if this is a text node.
164
_writer->addText(state()->paragraph,t.data().string());
165
return; // no children anymore...
168
// is this really needed ? it can't do harm anyway.
169
state()->format=_writer->currentFormat(state()->paragraph,true);
170
state()->layout=_writer->currentLayout(state()->paragraph);
175
bool go_recursive=true;
178
// get the CSS information
180
// get the tag information
181
go_recursive=parseTag(e);
184
for (DOM::Node q=node.firstChild(); !q.isNull(); q=q.nextSibling()) {
193
void KHTMLReader::parse_head(DOM::Element e) {
194
for (DOM::Element items=e.firstChild();!items.isNull();items=items.nextSibling()) {
195
if (items.tagName().string().lower() == "title") {
196
DOM::Text t=items.firstChild();
198
_writer->createDocInfo("HTML import filter",t.data().string());
205
if (e.tagName().lower() == #x) \
206
return parse_##x(e); \
209
#define _PF(x,a,b,c) { \
210
if (e.tagName().lower() == #x) \
212
_writer->formatAttribute(state()->paragraph, #a,#b,#c); \
217
// the state->layout=_writer->setLayout is meant to tell popState something changed in the layout, and a new
218
// layout should probably be started after closing.
220
#define _PL(x,a,b,c) { \
221
if (e.tagName().lower() == #x) \
223
state()->layout=_writer->setLayout(state()->paragraph,state()->layout);\
224
if (!(_writer->getText(state()->paragraph).isEmpty())) \
225
startNewParagraph(false,false); \
226
_writer->layoutAttribute(state()->paragraph, #a,#b,#c); \
232
bool KHTMLReader::parseTag(DOM::Element e) {
235
// _PP(table); <- disabled for beta.
242
// FIXME we can get rid of these, make things tons more simple
243
// when khtml finally implements getComputedStyle
244
_PF(b,WEIGHT,value,75);
245
_PF(strong,WEIGHT,value,75);
246
_PF(u,UNDERLINE,value,1);
247
_PF(i,ITALIC,value,1);
249
_PL(center,FLOW,align,center);
250
_PL(right,FLOW,align,right);
251
_PL(left,FLOW,align,left);
253
_PL(h1,NAME,value,h1);
254
_PL(h2,NAME,value,h2);
255
_PL(h3,NAME,value,h3);
256
_PL(h4,NAME,value,h4);
257
_PL(h5,NAME,value,h5);
258
_PL(h6,NAME,value,h6);
264
void KHTMLReader::parseStyle(DOM::Element e) {
265
#if 0 // styles are broken broken broken broken broken broken.
266
DOM::CSSStyleDeclaration s1=e.style();
267
DOM::Document doc=_html->document();
268
DOM::CSSStyleDeclaration s2=doc.defaultView().getComputedStyle(e,"");
269
//FIXME: wait until getComputedStyle is more than
270
// 'return 0' in khtml
272
if (PROPV("font-weight") == "bolder")
273
_writer->formatAttribute(state()->paragraph,"WEIGHT","value","75");
276
kdDebug() << "e.style()" << endl;
277
for (unsigned int i=0;i<s1.length();i++) {
278
kdDebug() << QString("%1: %2").arg(s1.item(i).string()).arg(s1.getPropertyValue(s1.item(i)).string()) << endl;
280
kdDebug() << "override style" << endl;
281
for (unsigned int i=0;i<s2.length();i++) {
282
kdDebug() << QString("%1: %2").arg(s2.item(i).string()).arg(s2.getPropertyValue(s2.item(i)).string()) << endl;
287
void KHTMLReader::startNewParagraph(bool startnewformat, bool startnewlayout) {
289
QDomElement qf=state()->format;
290
QDomElement ql=state()->layout;
292
_writer->cleanUpParagraph(state()->paragraph);
294
if ((startnewlayout==true) || ql.isNull())
295
{state()->paragraph=_writer->addParagraph(state()->frameset);}
298
_writer->addParagraph(state()->frameset,state()->layout);}
302
if (qf.isNull() || (startnewformat==true)) {
303
state()->format=_writer->startFormat(state()->paragraph/*,state()->format*/);
305
state()->format=_writer->startFormat(state()->paragraph,qf);
309
support lists: if we are in a list, and we start a new paragraph,
310
we don't want to start a new item, but we want to retain the list state.
311
we do this by incrementing the 'environment depth' and changing the numbering type to 'no numbering'
313
QString ct=_writer->getLayoutAttribute(state()->paragraph,"COUNTER","type");
314
if ((!ct.isNull()) && (ct != "0")) {
315
_writer->layoutAttribute(state()->paragraph,"COUNTER","type","0");
316
_writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","0");
317
_writer->layoutAttribute(state()->paragraph,"COUNTER","righttext","");
318
int currdepth=(_writer->getLayoutAttribute(state()->paragraph,"COUNTER","depth")).toInt();
319
_writer->layoutAttribute(state()->paragraph,"COUNTER","depth",QString("%1").arg(currdepth+1));
323
KHTMLReader::~KHTMLReader(){
331
//==============================================================
333
//==============================================================
336
bool KHTMLReader::parse_CommonAttributes(DOM::Element e) {
337
QString s=e.getAttribute("align").string();
339
_writer->formatAttribute(state()->paragraph,"FLOW","align",s);
346
bool KHTMLReader::parse_p(DOM::Element e) {
348
parse_CommonAttributes(e);
353
bool KHTMLReader::parse_hr(DOM::Element e) {
355
_writer->createHR(state()->paragraph);
360
bool KHTMLReader::parse_br(DOM::Element e) {
361
startNewParagraph(false,false); //keep the current format and layout
362
return false; // a BR tag has no childs.
365
QColor parsecolor(QString colorstring) {
367
if (colorstring[0]=='#') {
369
colorstring.mid(1,2).toInt(0,16),
370
colorstring.mid(3,2).toInt(0,16),
371
colorstring.mid(5,2).toInt(0,16)
374
QString colorlower=colorstring.lower();
376
if (colorlower=="black")
378
else if (colorlower=="white")
379
color.setRgb(255,255,255);
380
else if (colorlower=="silver")
381
color.setRgb(0xc0,0xc0,0xc0);
382
else if (colorlower=="gray")
383
color.setRgb(128,128,128);
385
else if (colorlower=="red")
386
color.setRgb(255,0,0);
387
else if (colorlower=="lime")
388
color.setRgb(0,255,0);
389
else if (colorlower=="blue")
390
color.setRgb(0,0,255);
391
else if (colorlower=="yellow")
392
color.setRgb(255,255,0);
393
else if (colorlower=="fuchsia")
394
color.setRgb(255,0,255);
395
else if (colorlower=="aqua")
396
color.setRgb(0,255,255);
398
else if (colorlower=="maroon")
399
color.setRgb(128,0,0);
400
else if (colorlower=="green")
401
color.setRgb(0,128,0);
402
else if (colorlower=="navy")
403
color.setRgb(0,0,128);
404
else if (colorlower=="olive")
405
color.setRgb(128,128,0);
406
else if (colorlower=="purple")
407
color.setRgb(128,0,128);
408
else if (colorlower=="teal")
409
color.setRgb(0,128,128);
411
// H'm, we have still not found the color!
412
// Let us see if QT can do better!
413
color.setNamedColor(colorstring);
420
bool KHTMLReader::parse_table(DOM::Element e) {
421
int tableno=_writer->createTable();
424
int has_borders=false;
425
QColor bgcolor=parsecolor("#FFFFFF");
426
DOM::Element table_body=e.firstChild();
427
if (!table_body.getAttribute("bgcolor").string().isEmpty())
428
bgcolor=parsecolor(table_body.getAttribute("bgcolor").string());
429
if ((e.getAttribute("border").string().toInt() > 0))
432
// fixme rewrite this proper
433
//(maybe using computed sizes from khtml if thats once exported)
434
for (DOM::Element rows=table_body.firstChild();!rows.isNull();rows=rows.nextSibling()) {
435
if (rows.tagName().string().lower() == "tr") {
437
QColor obgcolor=bgcolor;
438
if (!rows.getAttribute("bgcolor").string().isEmpty())
439
bgcolor=parsecolor(rows.getAttribute("bgcolor").string());
442
for (DOM::Element cols=rows.firstChild();!cols.isNull();cols=cols.nextSibling()) {
443
if (cols.tagName().string().lower() == "td") {
444
QColor bbgcolor=bgcolor;
445
if (!cols.getAttribute("bgcolor").string().isEmpty())
446
bgcolor=parsecolor(cols.getAttribute("bgcolor").string());
449
QRect colrect=cols.getRect();
450
state()->frameset=_writer->createTableCell(tableno,nrow,ncol,1,colrect);
451
state()->frameset.firstChild().toElement().setAttribute("bkRed",bgcolor.red());
452
state()->frameset.firstChild().toElement().setAttribute("bkGreen",bgcolor.green());
453
state()->frameset.firstChild().toElement().setAttribute("bkBlue",bgcolor.blue());
455
state()->frameset.firstChild().toElement().setAttribute("lWidth",1);
456
state()->frameset.firstChild().toElement().setAttribute("rWidth",1);
457
state()->frameset.firstChild().toElement().setAttribute("bWidth",1);
458
state()->frameset.firstChild().toElement().setAttribute("tWidth",1);
461
// fixme don't guess. get it right.
462
state()->paragraph=_writer->addParagraph(state()->frameset);
464
_writer->cleanUpParagraph(state()->paragraph);
474
_writer->finishTable(tableno/*,0,0,r.right()-r.left(),r.bottom()-r.top()*/); // FIXME find something better.
475
startNewParagraph(false,false);
476
_writer->createInline(state()->paragraph,_writer->fetchTableCell(tableno,0,0));
477
startNewParagraph(false,false);
478
return false; // we do our own recursion
481
bool KHTMLReader::parse_img(DOM::Element e) {
482
//QRect e=e.getRect();
487
bool KHTMLReader::parse_pre(DOM::Element e) {
488
DOM::Text prething=e.firstChild();
489
if (prething.isNull()) return false;
491
QStringList k=QStringList::split("\n",prething.data().string());
495
for (QStringList::Iterator b=k.begin();b!=k.end();++b) {
496
_writer->addText(state()->paragraph,*b);
499
return false; // FIXME no support for tags in <PRE> sections ATM.
502
bool KHTMLReader::parse_ol(DOM::Element e) {
506
bool KHTMLReader::parse_font(DOM::Element e) {
507
// fixme don't hardcode 12 font size ...
508
QString face=e.getAttribute("face").string();
509
QColor color=parsecolor("#000000");
510
if (!e.getAttribute("color").string().isEmpty())
511
color=parsecolor(e.getAttribute("color").string());
512
QString size=e.getAttribute("size").string();
514
if (size.startsWith("+"))
515
isize=12+size.right(size.length()-1).toInt();
516
else if (size.startsWith("-"))
517
isize=12-size.right(size.length()-1).toInt();
519
isize=12+size.toInt();
521
_writer->formatAttribute(state()->paragraph,"FONT","name",face);
522
if ((isize>=0) && (isize != 12))
523
_writer->formatAttribute(state()->paragraph,"SIZE","value",QString("%1").arg(isize));
525
_writer->formatAttribute(state()->paragraph,"COLOR","red",QString("%1").arg(color.red()));
526
_writer->formatAttribute(state()->paragraph,"COLOR","green",QString("%1").arg(color.green()));
527
_writer->formatAttribute(state()->paragraph,"COLOR","blue",QString("%1").arg(color.blue()));
531
bool KHTMLReader::parse_ul(DOM::Element e) {
533
for (DOM::Element items=e.firstChild();!items.isNull();items=items.nextSibling()) {
534
if (items.tagName().string().lower() == "li") {
537
_writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","1");
538
_writer->layoutAttribute(state()->paragraph,"COUNTER","righttext",".");
539
if (e.tagName().string().lower() == "ol")
541
_writer->layoutAttribute(state()->paragraph,"COUNTER","type","1");
542
_writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","1");
543
_writer->layoutAttribute(state()->paragraph,"COUNTER","righttext",".");
547
_writer->layoutAttribute(state()->paragraph,"COUNTER","type","10");
548
_writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","");
549
_writer->layoutAttribute(state()->paragraph,"COUNTER","righttext","");
551
_writer->layoutAttribute(state()->paragraph,"COUNTER","depth",QString("%1").arg(_list_depth-1));