2
* Copyright (C) 2011-2012 Charlie Sharpsteen, Stefan Löffler
4
* This program is free software; you can redistribute it and/or modify it
5
* under the terms of the GNU General Public License as published by the Free
6
* Software Foundation; either version 2, or (at your option) any later
9
* This program is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15
#include <PDFBackend.h>
17
#include <QApplication>
23
// TODO: Find a better place to put this
24
QBrush * pageDummyBrush = NULL;
26
QDateTime fromPDFDate(QString pdfDate)
33
int hourOffset, minuteOffset;
36
// "D:" prefix is strongly recommended, but optional; we don't need it here
37
if (pdfDate.startsWith(QString::fromUtf8("D:")))
41
if (pdfDate.length() < 4)
43
format = QString::fromUtf8("yyyy");
44
if (pdfDate.length() >= 6)
45
format += QString::fromUtf8("MM");
46
if (pdfDate.length() >= 8)
47
format += QString::fromUtf8("dd");
48
date = QDate::fromString(pdfDate.left(format.length()), format);
49
pdfDate.remove(0, format.length());
52
if (pdfDate.length() < 2)
53
return QDateTime(date, time);
54
format = QString::fromUtf8("hh");
55
if (pdfDate.length() >= 4)
56
format += QString::fromUtf8("mm");
57
if (pdfDate.length() >= 6)
58
format += QString::fromUtf8("ss");
59
time = QTime::fromString(pdfDate.left(format.length()), format);
60
pdfDate.remove(0, format.length());
62
// Parse time zone data
63
if (pdfDate.length() == 0)
64
return QDateTime(date, time);
65
switch (pdfDate[0].toAscii()) {
67
return QDateTime(date, time, Qt::UTC).toLocalTime();
69
// Note: A `+` signifies that pdfDate is later than UTC. Since we will
70
// specify the QDateTime in UTC below, we have to _subtract_ the offset
77
return QDateTime(date, time);
80
if (pdfDate.length() < 3 || pdfDate[2] != QChar::fromAscii('\''))
81
return QDateTime(date, time);
82
hourOffset = pdfDate.left(2).toInt(&ok);
84
return QDateTime(date, time);
86
if (pdfDate.length() >= 3 && pdfDate[2] == QChar::fromAscii('\''))
87
minuteOffset = pdfDate.left(2).toInt();
88
return QDateTime(date, time, Qt::UTC).addSecs(sign * (hourOffset * 3600 + minuteOffset * 60)).toLocalTime();
92
void PDFPageProcessingThread::dumpWorkStack(const QStack<PageProcessingRequest*> & ws)
96
for (i = 0; i < ws.size(); ++i) {
97
PageProcessingRequest * request = ws[i];
99
strList << QString::fromUtf8("NULL");
114
PDFFontDescriptor::PDFFontDescriptor(const QString fontName /* = QString() */) :
116
_stretch(FontStretch_Normal),
132
bool PDFFontDescriptor::isSubset() const
134
// Subset fonts have a tag of 6 upper-case letters, followed by a '+',
135
// prefixed to the font name
136
if (_name.length() < 7 || _name[6] != QChar::fromAscii('+'))
138
for (int i = 0; i < 6; ++i) {
139
if (!_name[i].isUpper())
145
QString PDFFontDescriptor::pureName() const
156
// The `PDFPageProcessingThread` is a thread that processes background jobs.
157
// Each job is represented by a subclass of `PageProcessingRequest` and
158
// contains an `execute` method that performs the actual work.
159
PDFPageProcessingThread::PDFPageProcessingThread() :
165
PDFPageProcessingThread::~PDFPageProcessingThread()
169
_waitCondition.wakeAll();
174
void PDFPageProcessingThread::addPageProcessingRequest(PageProcessingRequest * request)
180
// `request` must live in the main (GUI) thread, or else destroying it later
182
Q_ASSERT(request->thread() == QApplication::instance()->thread());
184
QMutexLocker locker(&(this->_mutex));
185
// Note: Commenting the "remove identical requests in the stack" code for now.
186
// This should be handled by the caching routine elsewhere automatically. If
187
// in doubt, it's better to render a tile twice than to not render it at all
188
// (thereby leaving the dummy image in the cache indefinitely)
190
// remove any instances of the given request type before adding the new one to
191
// avoid processing it several times
192
// **TODO:** Could it be that we require several concurrent versions of the
195
for (i = _workStack.size() - 1; i >= 0; --i) {
196
if (*(_workStack[i]) == *request) {
197
// Using deleteLater() doesn't work because we have no event queue in this
198
// thread. However, since the object is still on the stack, it is still
199
// sleeping and directly deleting it should therefore be safe.
200
delete _workStack[i];
201
_workStack.remove(i);
206
_workStack.push(request);
208
qDebug() << "new request:" << *request;
215
_waitCondition.wakeOne();
218
void PDFPageProcessingThread::run()
220
PageProcessingRequest * workItem;
225
// mutex must be locked at start of loop
226
if (_workStack.size() > 0) {
227
workItem = _workStack.pop();
231
qDebug() << "processing work item" << *workItem << "; remaining items:" << _workStack.size();
232
_renderTimer.start();
237
switch (workItem->type()) {
238
case PageProcessingRequest::LoadLinks:
239
jobDesc = QString::fromUtf8("loading links");
241
case PageProcessingRequest::PageRendering:
242
jobDesc = QString::fromUtf8("rendering page");
245
qDebug() << "finished " << jobDesc << "for page" << workItem->page->pageNum() << ". Time elapsed: " << _renderTimer.elapsed() << " ms.";
248
// Delete the work item as it has fulfilled its purpose
249
// Note that we can't delete it here or we might risk that some emitted
250
// signals are invalidated; to ensure they reach their destination, we
251
// need to call deleteLater().
252
// Note: workItem *must* live in the main (GUI) thread for this!
253
Q_ASSERT(workItem->thread() == QApplication::instance()->thread());
254
workItem->deleteLater();
260
qDebug() << "going to sleep";
262
_idleCondition.wakeAll();
264
_waitCondition.wait(&_mutex);
267
qDebug() << "waking up";
273
void PDFPageProcessingThread::clearWorkStack()
277
foreach(PageProcessingRequest * workItem, _workStack) {
280
Q_ASSERT(workItem->thread() == QApplication::instance()->thread());
281
workItem->deleteLater();
286
// Wait until the current operation finishes
287
_idleCondition.wait(&_mutex);
293
// Asynchronous Page Operations
294
// ----------------------------
296
// The `execute` functions here are called by the processing theread to perform
297
// background jobs such as page rendering or link loading. This alows the GUI
298
// thread to stay unblocked and responsive. The results of background jobs are
299
// posted as events to a `listener` which can be any subclass of `QObject`. The
300
// `listener` will need a custom `event` function that is capable of picking up
303
bool PageProcessingRequest::operator==(const PageProcessingRequest & r) const
305
// TODO: Should we care about the listener here as well?
306
return (type() == r.type() && page == r.page);
309
bool PageProcessingRenderPageRequest::operator==(const PageProcessingRequest & r) const
311
if (!PageProcessingRequest::operator==(r))
313
const PageProcessingRenderPageRequest * rr = static_cast<const PageProcessingRenderPageRequest*>(&r);
314
// TODO: Should we care about the listener here as well?
315
return (xres == rr->xres && yres == rr->yres && render_box == rr->render_box && cache == rr->cache);
319
PageProcessingRenderPageRequest::operator QString() const
321
return QString::fromUtf8("RP:%1.%2_%3").arg(page->pageNum()).arg(render_box.topLeft().x()).arg(render_box.topLeft().y());
325
// ### Custom Event Types
326
// These are the events posted by `execute` functions.
327
const QEvent::Type PDFPageRenderedEvent::PageRenderedEvent = static_cast<QEvent::Type>( QEvent::registerEventType() );
328
const QEvent::Type PDFLinksLoadedEvent::LinksLoadedEvent = static_cast<QEvent::Type>( QEvent::registerEventType() );
330
bool PageProcessingRenderPageRequest::execute()
332
// TODO: Aborting renders doesn't really work right now---the backend knows
333
// nothing about the PDF scenes.
335
// Idea: Perhaps allow page render requests to provide a pointer to a function
336
// that returns a `bool` value indicating if the request is still valid? Then
337
// the `PDFPageGraphicsItem` could have a function that indicates if the item
338
// is anywhere near a viewport.
339
QImage rendered_page = page->renderToImage(xres, yres, render_box, cache);
340
QCoreApplication::postEvent(listener, new PDFPageRenderedEvent(xres, yres, render_box, rendered_page));
345
bool PageProcessingLoadLinksRequest::execute()
347
QCoreApplication::postEvent(listener, new PDFLinksLoadedEvent(page->loadLinks()));
352
PageProcessingLoadLinksRequest::operator QString() const
354
return QString::fromUtf8("LL:%1").arg(page->pageNum());
359
PDFPageTile::operator QString() const
361
return QString::fromUtf8("p%1,%2x%3,r%4|%5x%6|%7").arg(page_num).arg(xres).arg(yres).arg(render_box.x()).arg(render_box.y()).arg(render_box.width()).arg(render_box.height());
365
// Taken from Qt 4.7.2 sources (<Qt>/src/corelib/tools/qhash.cpp)
366
static uint hash(const uchar *p, int n)
372
h ^= (h & 0xf0000000) >> 23;
378
inline uint qHash(const QRect &key) {
380
QPair< QPair< int, int >, QPair< int, int > >(
381
QPair< int, int >(key.x(), key.y()),
382
QPair< int, int >(key.width(), key.height())
387
inline uint qHash(const double &d)
389
// We interpret the double as an array of bytes and use the hash() function on
391
// NOTE: Due to rounding errors, this is not 100% reliable - two doubles that
392
// _look_ the same may actually differ in their bit representations (e.g., if
393
// the same value was calculated in two different ways). So this function may
394
// report different hashes for doubles that look the same (which should not be
395
// a problem in our case, however).
396
// Note also that the QDataStream approach used previously also works on the
397
// binary representation of doubles internally and so the same problem would
398
// occur there as well.
399
return hash((const uchar*)&d, sizeof(d));
402
// ### Cache for Rendered Images
403
inline uint qHash(const PDFPageTile &tile)
405
uint h1 = qHash(QPair<uint, uint>(qHash(tile.xres), qHash(tile.yres)));
406
uint h2 = qHash(QPair<uint,int>(qHash(tile.render_box), tile.page_num));
407
return qHash(QPair<uint, uint>(h1, h2));
410
QSharedPointer<QImage> PDFPageCache::getImage(const PDFPageTile & tile) const
413
QSharedPointer<QImage> * retVal = object(tile);
417
return QSharedPointer<QImage>();
420
QSharedPointer<QImage> PDFPageCache::setImage(const PDFPageTile & tile, QImage * image, const bool overwrite /* = true */)
422
_lock.lockForWrite();
423
QSharedPointer<QImage> retVal;
425
retVal = *object(tile);
426
// If the key is not in the cache yet add it. Otherwise overwrite the cached
427
// image but leave the pointer intact as that can be held/used elsewhere
429
QSharedPointer<QImage> * toInsert = new QSharedPointer<QImage>(image);
430
insert(tile, toInsert, (image ? image->byteCount() : 0));
434
// TODO: overwriting an image with a different one can change its size (and
435
// therefore its cost in the cache). There doesn't seem to be a method to
436
// hande that in QCache, though, and since we only use one tile size this
437
// shouldn't pose a problem.
441
QSharedPointer<QImage> * toInsert = new QSharedPointer<QImage>;
442
insert(tile, toInsert, 0);
457
// This class is thread-safe. Data access is governed by the QReadWriteLock
459
Document::Document(QString fileName):
462
_meta_trapped(Trapped_Unknown),
463
_docLock(new QReadWriteLock(QReadWriteLock::Recursive))
465
Q_ASSERT(_docLock != NULL);
468
// qDebug() << "Document::Document(" << fileName << ")";
471
// Set cache for rendered pages to be 1GB. This is enough for 256 RGBA tiles
472
// (1024 x 1024 pixels x 4 bytes per pixel).
474
// NOTE: The application seems to exceed 1 GB---usage plateaus at around 2GB. No idea why. Perhaps freed
475
// blocks are not garbage collected?? Perhaps my math is off??
476
_pageCache.setMaxSize(1024 * 1024 * 1024);
479
Document::~Document()
482
// qDebug() << "Document::~Document()";
484
QWriteLocker docLocker(_docLock.data());
488
int Document::numPages() { QReadLocker docLocker(_docLock.data()); return _numPages; }
489
PDFPageProcessingThread &Document::processingThread() { QReadLocker docLocker(_docLock.data()); return _processingThread; }
490
PDFPageCache &Document::pageCache() { QReadLocker docLocker(_docLock.data()); return _pageCache; }
492
QList<SearchResult> Document::search(QString searchText, int startPage)
494
QReadLocker docLocker(_docLock.data());
495
QList<SearchResult> results;
498
for (i = startPage; i < _numPages; ++i) {
499
QSharedPointer<Page> page(_pages[i]);
502
results << page->search(searchText);
504
for (i = 0; i < startPage; ++i) {
505
QSharedPointer<Page> page(_pages[i]);
508
results << page->search(searchText);
514
void Document::clearPages()
516
QWriteLocker docLocker(_docLock.data());
517
foreach(QSharedPointer<Page> page, _pages) {
520
page->detachFromParent();
522
// Note: clear() releases all QSharedPointer to pages, thereby destroying them
523
// (if they are not used elsewhere)
527
void Document::clearMetaData()
529
QWriteLocker docLocker(_docLock.data());
530
_meta_title = QString();
531
_meta_author = QString();
532
_meta_subject = QString();
533
_meta_keywords = QString();
534
_meta_creator = QString();
535
_meta_producer = QString();
537
_meta_creationDate = QDateTime();
538
_meta_modDate = QDateTime();
539
_meta_trapped = Trapped_Unknown;
546
// This class is thread-safe. Data access is governed by the QReadWriteLock
547
// _pageLock. When accessing the parent document directly (i.e., not via public
548
// member functions), the QSharedPointer<QReadWriteLock> _docLock must also be
549
// acquired. Note that if _docLock and _pageLock are to be acquired, _docLock
550
// must be acquired first.
551
// Note that the Page may exist in a detached state, i.e., _parent == NULL. This
552
// is typically the case when the document discarded the page object but some
553
// other object (typically in another thread) still holds a QSharedPointer to it.
554
Page::Page(Document *parent, int at, QSharedPointer<QReadWriteLock> docLock):
558
_pageLock(new QReadWriteLock(QReadWriteLock::Recursive)),
564
// qDebug() << "Page::Page(" << parent << ", " << at << ")";
567
if (!pageDummyBrush) {
568
pageDummyBrush = new QBrush();
570
// Make a texture brush which can be used to print "rendering page" all over
571
// the dummy tiles that are shown while the rendering thread is doing its
573
QImage brushTex(1024, 1024, QImage::Format_ARGB32);
577
p.fillRect(brushTex.rect(), Qt::white);
578
p.setPen(Qt::lightGray);
579
p.drawText(brushTex.rect(), Qt::AlignCenter | Qt::AlignVCenter | Qt::TextSingleLine, QCoreApplication::translate("QtPDF::PDFDocumentScene", "rendering page"), &textRect);
581
textRect.adjust(-textRect.width() * .05, -textRect.height() * .1, textRect.width() * .05, textRect.height() * .1);
582
brushTex = brushTex.copy(textRect.toAlignedRect());
584
pageDummyBrush->setTextureImage(brushTex);
585
pageDummyBrush->setTransform(QTransform().rotate(-45));
592
// qDebug() << "Page::~Page(" << _n << ")";
596
int Page::pageNum() { QReadLocker pageLocker(_pageLock); return _n; }
598
void Page::detachFromParent()
600
QWriteLocker pageLocker(_pageLock);
604
QSharedPointer<QImage> Page::getCachedImage(double xres, double yres, QRect render_box)
606
QReadLocker docLocker(_docLock.data());
607
QReadLocker pageLocker(_pageLock);
609
return QSharedPointer<QImage>();
610
return _parent->pageCache().getImage(PDFPageTile(xres, yres, render_box, _n));
613
void Page::asyncRenderToImage(QObject *listener, double xres, double yres, QRect render_box, bool cache)
615
QReadLocker docLocker(_docLock.data());
616
QReadLocker pageLocker(_pageLock);
619
_parent->processingThread().addPageProcessingRequest(new PageProcessingRenderPageRequest(this, listener, xres, yres, render_box, cache));
622
bool higherResolutionThan(const PDFPageTile & t1, const PDFPageTile & t2)
624
// Note: We silently assume that xres and yres behave the same way
625
return t1.xres > t2.xres;
628
QSharedPointer<QImage> Page::getTileImage(QObject * listener, const double xres, const double yres, QRect render_box /* = QRect() */)
630
QReadLocker docLocker(_docLock.data());
631
QReadLocker pageLocker(_pageLock);
633
// If the render_box is empty, use the whole page
634
if (render_box.isNull())
635
render_box = QRectF(0, 0, pageSizeF().width() * xres / 72., pageSizeF().height() * yres / 72.).toAlignedRect();
637
// If the tile is cached, return it
638
QSharedPointer<QImage> retVal = getCachedImage(xres, yres, render_box);
643
// Render asyncronously, but add a dummy image to the cache first and return
645
// Note: Start the rendering in the background before constructing the image
646
// to take advantage of multi-core CPUs. Since we hold the write lock here
647
// there's nothing to worry about
648
asyncRenderToImage(listener, xres, yres, render_box, true);
650
QImage * tmpImg = new QImage(render_box.width(), render_box.height(), QImage::Format_ARGB32);
652
p.fillRect(tmpImg->rect(), *pageDummyBrush);
654
// Look through the cache to find tiles we can reuse (by scaling) for our
656
// TODO: Benchmark this. If it is actualy too slow (i.e., just keeping the
657
// rendered image from popping up due to the write lock we hold) disable it
659
QList<PDFPageTile> tiles = _parent->pageCache().tiles();
660
for (QList<PDFPageTile>::iterator it = tiles.begin(); it != tiles.end(); ) {
661
if (it->page_num != pageNum()) {
662
it = tiles.erase(it);
665
// See if it->render_box intersects with render_box (after proper scaling)
666
QRect scaledRect = QTransform::fromScale(xres / it->xres, yres / it->yres).mapRect(it->render_box);
667
if (!scaledRect.intersects(render_box)) {
668
it = tiles.erase(it);
673
// Sort the remaining tiles by size, high-res first
674
qSort(tiles.begin(), tiles.end(), higherResolutionThan);
675
// Finally, crop, scale and paint each image until the whole area is
676
// filled or no images are left in the list
677
QPainterPath clipPath;
678
clipPath.addRect(0, 0, render_box.width(), render_box.height());
679
foreach (PDFPageTile tile, tiles) {
680
QSharedPointer<QImage> tileImg = _parent->pageCache().getImage(tile);
684
// cropRect is the part of `tile` that overlaps the tile-to-paint (after
686
// paintRect is the part `tile` fills of the area we paint to (after
688
QRect cropRect = QTransform::fromScale(tile.xres / xres, tile.yres / yres).mapRect(render_box).intersected(tile.render_box).translated(-tile.render_box.left(), -tile.render_box.top());
689
QRect paintRect = QTransform::fromScale(xres / tile.xres, yres / tile.yres).mapRect(tile.render_box).intersected(render_box).translated(-render_box.left(), -render_box.top());
691
// Get the actual image and paint it onto the dummy tile
692
QImage tmp(tileImg->copy(cropRect).scaled(paintRect.size()));
693
p.setClipPath(clipPath);
694
p.drawImage(paintRect.topLeft(), tmp);
696
// Confine the clipping path to the part we have not painted to yet.
698
pp.addRect(paintRect);
699
clipPath = clipPath.subtracted(pp);
700
if (clipPath.isEmpty())
704
// stop painting or else we couldn't (possibly) delete tmpImg below
707
// Add the dummy tile to the cache
708
// Note: In the meantime the asynchronous rendering could have finished and
709
// insert the final image in the cache---we must handle that case and delete
710
// our temporary image
711
retVal = _parent->pageCache().setImage(PDFPageTile(xres, yres, render_box, _n), tmpImg, false);
712
if (retVal != tmpImg)
717
renderToImage(xres, yres, render_box, true);
718
return getCachedImage(xres, yres, render_box);
722
void Page::asyncLoadLinks(QObject *listener)
724
QReadLocker docLocker(_docLock.data());
725
QReadLocker pageLocker(_pageLock);
728
_parent->processingThread().addPageProcessingRequest(new PageProcessingLoadLinksRequest(this, listener));
732
QList<SearchResult> Page::executeSearch(SearchRequest request)
734
QSharedPointer<Document> doc(request.doc.toStrongRef());
736
return QList<SearchResult>();
737
QSharedPointer<Page> page = doc->page(request.pageNum).toStrongRef();
739
return QList<SearchResult>();
740
return page->search(request.searchString);
743
} // namespace Backend
747
// vim: set sw=2 ts=2 et