2
* Copyright (c) 2002-2003 Nicolas HADACEK (hadacek@kde.org)
4
* This program is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
#include "FilterPage.h"
27
#include "transform.h"
30
#define TIME_START(str) { \
31
kdDebug(30516) << str << endl; \
34
#define TIME_END { kdDebug(30516) << "elapsed=" << _time.elapsed() << endl; }
40
//-----------------------------------------------------------------------------
41
Page::Page(Data &data)
42
: TextPage(false), _data(data), _lastStr(0), _rects(Nb_ParagraphTypes)
44
_links.setAutoDelete(true);
55
void Page::beginString(GfxState *state, double x0, double y0)
57
// This check is needed because Type 3 characters can contain
58
// text-drawing operations.
64
// _data.checkTextFrameset();
65
curStr = new String(state, x0, y0, fontSize, _data.textIndex());
66
// kdDebug(30516) << "---" << endl;
69
void Page::endString()
71
// kdDebug(30516) << "endString..." << " len=" << curStr->len
73
// << " len=" << (_lastStr ? _lastStr->len : -1) << endl;
74
TextPage::endString();
75
// kdDebug(30516) << " ...endString done" << endl;
78
void Page::addString(TextString *str)
80
// kdDebug(30516) << "addString..." << endl;
81
// if ( str->len==0 ) kdDebug(30516) << "empty string !" << endl;
82
if (_lastStr) _lastStr->checkCombination(str);
83
_lastStr = (str->len==0 ? 0 : static_cast<String *>(str));
85
// for (int i=0; i<str->len; i++) s += QChar(str->text[i]);
86
// kdDebug(30516) << "string: " << s << " ("
87
// << (str->len>0 ? s[0].unicode() : 0) << ")" << endl;
88
TextPage::addString(str);
89
// kdDebug(30516) << " ...addString done" << endl;
92
TextBlock *Page::block(TextLine *line, int index)
96
for (TextBlock *block = line->blocks; block; block = block->next) k++;
99
for (TextBlock *block = line->blocks; block; block = block->next) {
100
if ( i==k ) return block;
106
//-----------------------------------------------------------------------------
107
bool Page::isLastParagraphLine(TextLine *line, const Paragraph &par)
110
if ( line->next==0 ) return true;
111
double dy = line->next->yMin - line->yMax;
112
double ndy = line->next->yMax - line->next->yMin;
113
String *str = static_cast<String *>(line->blocks->strings);
114
String *nStr = static_cast<String *>(line->next->blocks->strings);
115
// next line far below
116
if ( dy>0.5*ndy ) return true;
117
// image displayed before next line (?)
118
if ( str->frameIndex()!=nStr->frameIndex() ) return true;
119
if ( line->blocks==0 ) return false; // should not happen
120
// if contains one or more inside tabs
121
if (line->blocks->next) return true;
122
if ( line->next && line->next->blocks==0 ) return false;//should not happen
123
// if next line contains one or more inside tabs
124
if ( line->next && line->next->blocks->next ) return true;
125
TextBlock *b = block(line, -1);
126
if ( b==0 || b->len==0 ) return false; // should not happen
127
QChar c = QChar(b->text[b->len-1]);
128
// last line char is not '.' or ':'
129
if ( c!='.' && c!=':' ) return false;
130
// if at line end and block aligned : same paragraph
131
return ( !equal(b->xMax, par.rect().right()) );
134
void Page::createParagraphs()
136
TextLine *first = lines;
138
for (TextLine *line = lines; line; line = line->next) {
140
Paragraph par(first, nbLines);
141
if ( isLastParagraphLine(line, par) ) {
142
_pars.push_back(par);
149
void Page::checkHeader()
151
uint s = _pars.size();
153
Paragraph &par = _pars[0];
154
if ( par.lines().count()!=1 ) return;
155
const TextLine *first = par.lines().first();
156
const TextLine *second = (s>1 ? _pars[1].lines().first() : 0);
157
double limit = 0.2 * _data.pageRect().height();
158
double delta = 2 * kMin(first->yMax - first->yMin, 12.0);
159
// kdDebug(30516) << "first: " << first->yMax << " (" << limit << ")" << endl;
160
// if (second) kdDebug(30516) << "second: " << second->yMin << " "
161
// << second->yMin-first->yMax << " (" << delta
163
if ( first->yMax>limit ) return;
164
if ( second && (second->yMin-first->yMax)<delta ) return;
166
_rects[Header] = par.rect();
169
bool Page::hasHeader() const
171
return (_pars.size()>0 ? _pars[0].type==Header : false);
174
void Page::checkFooter()
176
uint s = _pars.size();
178
Paragraph &par = _pars[s-1];
179
if ( par.lines().count()!=1 ) return;
180
const TextLine *last = par.lines().first();
181
const TextLine *blast = (s>1 ? _pars[s-2].lines().last() : 0);
182
double limit = 0.8 * _data.pageRect().height();
183
double delta = 2 * kMin(last->yMax-last->yMin, 12.0);
184
// kdDebug(30516) << "last: " << last->yMax << " (" << limit << ")" << endl;
185
// if (blast) kdDebug(30516) << "blast: " << blast->yMin << " "
186
// << last->yMin-blast->yMax << " (" << delta
188
if ( last->yMin<limit ) return;
189
if ( blast && (last->yMin-blast->yMax)<delta ) return;
191
_rects[Footer] = par.rect();
194
bool Page::hasFooter() const
196
return (_pars.size()>0 ? _pars[_pars.size()-1].type==Footer
202
TIME_START("coalesce strings");
203
TextPage::coalesce();
208
// check header and footer
210
// if ( hasHeader() ) kdDebug(30516) << "has header" << endl;
212
// if ( hasFooter() ) kdDebug(30516) << "has footer" << endl;
215
uint begin = (hasHeader() ? 1 : 0);
216
uint end = _pars.size() - (hasFooter() ? 1 : 0);
217
for (uint i=begin; i<end; i++)
218
_rects[Body].unite(_pars[i].rect());
222
//-----------------------------------------------------------------------------
223
void Page::initParagraph(Paragraph &par) const
225
bool rightAligned = true, centered = true, leftAligned = true;
226
const double pleft = _rects[par.type].left();
227
const double pright = _rects[par.type].right();
228
const double pmean = (pleft + pright) / 2;
230
QValueList<TextLine *>::const_iterator it;
231
for (it = par.lines().begin(); it!=par.lines().end(); ++it) {
233
// compute tabulations
235
for (TextBlock *blk = (*it)->blocks; blk; blk = blk->next) {
236
// if tabulated text is aligned on right edge: put a tab
237
// on right edge and the tab type will be right aligned...
238
double tabRightAligned = equal(blk->xMax, pright);
239
double dx = (tabRightAligned ? pright : blk->xMin) - pleft;
240
// #### if the tab is just at the frame edge:
241
// the text is sent to next line ???
242
if (tabRightAligned) dx -= 0.1;
243
int res = par.findTab(dx, *it);
246
if (tabRightAligned) {
247
tab.alignment = Tabulator::Right;
248
kdDebug(30516) << "tabulated text right aligned.." << endl;
249
} else tab.alignment = Tabulator::Left;
250
par.tabs.push_back(tab);
253
qHeapSort2(par.tabs);
256
double left = (*it)->blocks->xMin - pleft;
257
if ( par.isFirst(*it) ) {
258
par.firstIndent = left;
259
par.leftIndent = left;
260
} else if ( par.isSecond(*it) ) par.leftIndent = left;
261
else par.leftIndent = kMin(par.leftIndent, left);
265
for (it = par.lines().begin(); it!=par.lines().end(); ++it) {
266
double left = (*it)->blocks->xMin;
267
double right = block(*it, -1)->xMax;
268
double mean = (left + right) / 2;
270
// for (int i=0; i<kMin(4, (*it)->blocks->len); i++)
271
// text += QChar((*it)->blocks->text[i]);
272
// kdDebug(30516) << text << " left=" << left
273
// << " pleft=" << pleft + par.leftIndent
274
// << " indent=" << par.leftIndent
275
// << " findent=" << par.firstIndent << endl;
276
if ( centered && !equal(mean, pmean) ) centered = false;
277
if ( leftAligned && (!par.isFirst(*it) || par.hasOneLine())
278
&& !equal(left, pleft + par.leftIndent, 0.05) ) {
279
kdDebug(30516) << "not left aligned" << endl;
282
if ( rightAligned && (!par.isLast(*it) || par.hasOneLine())
283
&& !equal(right, pright, 0.05) ) {
284
kdDebug(30516) << "not right aligned" << endl;
285
rightAligned = false;
289
// finalize alignment
290
if (rightAligned) par.align = (leftAligned ? AlignBlock : AlignRight);
291
else if (centered) par.align = AlignCenter;
294
void Page::fillParagraph(Paragraph &par, double &offset) const
296
const double pleft = _rects[par.type].left();
297
const double pright = _rects[par.type].right();
298
par.offset = par.lines().first()->yMin - offset;
299
// kdDebug(30516) << "offset=" << offset
300
// << " yMin=" << par.lines().first()->yMin
301
// << " paroffset=" << par.offset << endl;
302
if ( par.offset>0 ) offset += par.offset;
304
QValueList<TextLine *>::const_iterator it;
305
for (it = par.lines().begin(); it!=par.lines().end(); ++it) {
306
// end of previous line (inside a paragraph)
307
if ( !par.isFirst(*it) ) {
309
if (_data.options().smart) {
312
int si = par.charFromEnd(0, bi);
314
QChar c = par.blocks[bi].text[si];
315
int psi = par.charFromEnd(1, pbi);
316
QChar prev = (psi<0 ? QChar::null : par.blocks[pbi].text[psi]);
317
if ( !prev.isNull() && type(c.unicode())==Hyphen )
318
kdDebug(30516) << "hyphen ? " << QString(prev)
319
<< " type=" << type(prev.unicode())
322
((*it)->next ? (*it)->next->blocks->strings : 0);
323
if ( !prev.isNull() && type(c.unicode())==Hyphen
324
&& isLetter( type(prev.unicode()) )
325
&& next && next->len>0
326
&& isLetter( type(next->text[next->len-1]) ) ) {
327
kdDebug(30516) << "found hyphen" << endl;
329
par.blocks[bi].text.remove(si, 1);
334
bool remove = _data.options().smart;
335
if ( remove && par.align!=AlignBlock )
336
remove = ( par.rect().right()>0.9*pright );
337
b.text = (remove ? ' ' : '\n');
338
b.font = static_cast<String *>((*it)->blocks->strings)->font();
339
par.blocks.push_back(b);
344
TextBlock *prevBlk = 0;
345
for (TextBlock *blk = (*it)->blocks; blk; blk = blk->next) {
348
double tabRightAligned = equal(blk->xMax, pright);
349
double dx = (tabRightAligned ? pright : blk->xMin) - pleft;
350
int res = par.findTab(dx, *it);
353
double xMax = prevBlk->xMax - pleft;
354
res = par.findNbTabs(res, xMax);
355
if ( res==0 ) continue;
357
// no tabs for first block in AlignCenter and AlignRight
359
if ( prevBlk || !_data.options().smart
360
|| (par.align!=AlignCenter && par.align!=AlignRight) ) {
362
b.font = static_cast<String *>(blk->strings)->font();
363
for (uint k=0; k<(uint)res; k++) b.text += '\t';
364
par.blocks.push_back(b);
369
for (TextString *str = blk->strings; str; str = str->next) {
371
for (uint k = 0; k<uint(str->len); k++)
372
b.text += QChar(str->text[k]);
373
if (str->spaceAfter) b.text += ' ';
374
String *fstr = static_cast<String *>(str);
375
b.font = fstr->font();
377
par.blocks.push_back(b);
378
lineHeight = kMax(lineHeight, b.font.height());
384
offset += lineHeight;
388
FontFamily Page::checkSpecial(QChar &c, const Font &font) const
391
switch ( PDFImport::checkSpecial(c.unicode(), res) ) {
393
kdDebug(30516) << "found bullet" << endl;
394
// #### FIXME : if list, use a COUNTER
395
// temporarly replace by symbol
399
kdDebug(30516) << "found superscript" << endl;
403
if ( !font.isLatex() ) break;
404
kdDebug(30516) << "found latex special" << endl;
407
kdDebug(30516) << "found symbol=" << c.unicode() << endl;
417
void Page::checkSpecialChars(Paragraph &par) const
419
QValueList<Block> blocks;
420
for (uint k=0; k<par.blocks.size(); k++) {
421
const Block &b = par.blocks[k];
423
// kdDebug(30516) << "check \"" << b.text << "\"" << endl;
424
for (uint l=0; l<b.text.length(); l++) {
426
FontFamily family = checkSpecial(c, b.font);
427
if ( family==Nb_Family ) res += c;
429
if ( !res.isEmpty() ) {
431
blocks.back().text = res;
435
blocks.back().font.setFamily(family);
436
blocks.back().text = c;
439
if ( !res.isEmpty() ) {
441
blocks.back().text = res;
447
void Page::coalesce(Paragraph &par) const
449
QValueList<Block> blocks;
450
blocks.push_back(par.blocks[0]);
451
for (uint k=1; k<par.blocks.size(); k++) {
452
const Block &b = par.blocks[k];
453
if ( b.link==blocks.back().link && b.font==blocks.back().font )
454
blocks.back().text += b.text;
455
else blocks.push_back(b);
462
TIME_START("associate links");
463
for (Link *link=_links.first(); link; link=_links.next()) {
464
const DRect &r = link->rect();
465
// kdDebug(30516) << "link " << r.toString() << endl;
466
for (TextLine *line = lines; line; line = line->next)
467
for (TextBlock *blk = line->blocks; blk; blk = blk->next)
468
for (TextString *str = blk->strings; str; str = str->next) {
469
String *fstr = static_cast<String *>(str);
470
DRect sr = fstr->rect();
471
// kdDebug(30516) << "str " << sr.toString() << " "
472
// << r.isInside(sr) << endl;
473
if ( r.isInside(sr) ) fstr->link = link;
478
TIME_START("init paragraphs");
479
for (uint i=0; i<_pars.size(); i++) {
480
initParagraph(_pars[i]);
482
// special case for wide and centered one liner without tab
483
if ( _pars[i].align==AlignBlock && _pars[i].hasOneLine()
484
&& _pars[i].tabs.size()==0
486
|| (i!=0 && _pars[i-1].align==AlignCenter)
487
|| ((i+1)!=_pars.size() && _pars[i+1].align==AlignCenter)) )
488
_pars[i].align = AlignCenter;
492
TIME_START("fill paragraphs");
495
double offset = _rects[Header].top();
496
fillParagraph(_pars[0], offset);
499
uint end = _pars.size();
501
double offset = _rects[Footer].top();
503
fillParagraph(_pars[end], offset);
505
double offset = _rects[Body].top();
506
for (uint i=begin; i<end; i++)
507
fillParagraph(_pars[i], offset);
510
TIME_START("check for special chars");
511
for (uint i=0; i<_pars.size(); i++)
512
checkSpecialChars(_pars[i]);
515
// this is not really required...
516
TIME_START("coalesce formats");
517
for (uint i=0; i<_pars.size(); i++)
521
// if no paragraph : add an empty one
522
if ( _pars.size()==0 ) {
525
par.blocks.push_back(b);
526
_pars.push_back(par);
530
void Page::dump(const Paragraph &par)
532
QValueVector<QDomElement> layouts;
533
QValueVector<QDomElement> formats;
536
for (uint k=0; k<par.tabs.size(); k++) {
537
QDomElement element = par.tabs[k].createElement(_data);
538
layouts.push_back(element);
542
if ( !_data.options().smart || par.align!=AlignCenter ) {
543
QDomElement element = _data.createElement("INDENTS");
544
element.setAttribute("left", par.leftIndent);
545
double delta = par.firstIndent - par.leftIndent;
546
if ( !equal(delta, 0) ) element.setAttribute("first", delta);
547
layouts.push_back(element);
551
if ( par.offset>0 ) {
552
QDomElement element = _data.createElement("OFFSETS");
553
element.setAttribute("before", par.offset);
554
layouts.push_back(element);
558
if (_data.options().smart) {
560
// kdDebug(30516) << "flow=" << par.align << endl;
562
case AlignLeft: break;
563
case AlignRight: flow = "right"; break;
564
case AlignCenter: flow = "center"; break;
565
case AlignBlock: flow = "justify"; break;
567
if ( !flow.isEmpty() ) {
568
QDomElement element = _data.createElement("FLOW");
569
element.setAttribute("align", flow.utf8());
570
layouts.push_back(element);
577
for (uint k=0; k<par.blocks.size(); k++) {
578
const Block &b = par.blocks[k];
579
text += (b.link ? "#" : b.text);
580
uint len = (b.link ? 1 : b.text.length());
581
QDomElement element = _data.createElement("FORMAT");
582
QDomDocument document = _data.document();
583
bool r = b.font.format(document, element, pos, len);
584
if (b.link) b.link->format(document, element, pos, b.text);
585
if ( r || b.link ) formats.push_back(element);
589
_data.createParagraph(text, par.type, layouts, formats);
596
TIME_START("dump XML");
597
for (uint i=0; i<_pars.size(); i++)