1
//========================================================================
5
// Copyright 1996-2003 Glyph & Cog, LLC
7
//========================================================================
9
//========================================================================
11
// Modified under the Poppler project - http://poppler.freedesktop.org
13
// All changes made under the Poppler project to this file are licensed
14
// under GPL version 2 or later
16
// Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17
// Copyright (C) 2005, 2007-2009 Albert Astals Cid <aacid@kde.org>
18
// Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19
// Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20
// Copyright (C) 2008, 2010 Carlos Garcia Campos <carlosgc@gnome.org>
21
// Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22
// Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23
// Copyright (C) 2009 Axel Struebing <axel.struebing@freenet.de>
24
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
25
// Copyright (C) 2010 Jakub Wilk <ubanus@users.sf.net>
26
// Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27
// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28
// Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
30
// To see a description of the changes please see the Changelog file that
31
// came with your tarball or type make ChangeLog if you are building from git
33
//========================================================================
37
#ifdef USE_GCC_PRAGMAS
38
#pragma implementation
53
#include "goo/gstrtod.h"
54
#include "goo/GooString.h"
55
#include "poppler-config.h"
56
#include "GlobalParams.h"
61
#include "Linearization.h"
63
#include "OutputDev.h"
65
#include "ErrorCodes.h"
68
#include "SecurityHandler.h"
70
#ifndef DISABLE_OUTLINE
76
//------------------------------------------------------------------------
78
#define headerSearchSize 1024 // read this many bytes at beginning of
79
// file to look for '%PDF'
80
#define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
82
#define linearizationSearchSize 1024 // read this many bytes at beginning of
83
// file to look for linearization
86
#define xrefSearchSize 1024 // read this many bytes at end of file
87
// to look for 'startxref'
89
//------------------------------------------------------------------------
91
//------------------------------------------------------------------------
101
linearization = NULL;
104
#ifndef DISABLE_OUTLINE
107
startXRefPos = ~(Guint)0;
117
PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
118
GooString *userPassword, void *guiDataA) {
124
fileName = fileNameA;
128
if (stat(fileName->getCString(), &buf) == 0) {
134
file = fopen(fileName->getCString(), "rb", "ctx=stm");
136
file = fopen(fileName->getCString(), "rb");
139
// fopen() has failed.
140
// Keep a copy of the errno returned by fopen so that it can be
141
// referred to later.
143
error(-1, "Couldn't open file '%s': %s.", fileName->getCString(),
145
errCode = errOpenFile;
151
str = new FileStream(file, 0, gFalse, size, &obj);
153
ok = setup(ownerPassword, userPassword);
157
PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
158
GooString *userPassword, void *guiDataA) {
159
OSVERSIONINFO version;
160
wchar_t fileName2[MAX_PATH + 1];
168
//~ file name should be stored in Unicode (?)
169
fileName = new GooString();
170
for (i = 0; i < fileNameLen; ++i) {
171
fileName->append((char)fileNameA[i]);
174
// zero-terminate the file name string
175
for (i = 0; i < fileNameLen && i < MAX_PATH; ++i) {
176
fileName2[i] = fileNameA[i];
181
// NB: _wfopen is only available in NT
184
version.dwOSVersionInfoSize = sizeof(version);
185
GetVersionEx(&version);
186
if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
187
if (_wstat(fileName2, &buf) == 0) {
190
file = _wfopen(fileName2, L"rb");
192
if (_stat(fileName->getCString(), &buf) == 0) {
195
file = fopen(fileName->getCString(), "rb");
198
error(-1, "Couldn't open file '%s'", fileName->getCString());
199
errCode = errOpenFile;
205
str = new FileStream(file, 0, gFalse, size, &obj);
207
ok = setup(ownerPassword, userPassword);
211
PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword,
212
GooString *userPassword, void *guiDataA) {
216
if (strA->getFileName()) {
217
fileName = strA->getFileName()->copy();
222
ok = setup(ownerPassword, userPassword);
225
GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
227
if (str->getPos() < 0)
229
error(-1, "Document base stream is not seekable");
236
// Adobe does not seem to enforce %%EOF, so we do the same
237
// if (!checkFooter()) return gFalse;
242
GBool wasReconstructed = false;
245
xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
247
error(-1, "Couldn't read xref table");
248
errCode = xref->getErrorCode();
252
// check for encryption
253
if (!checkEncryption(ownerPassword, userPassword)) {
254
errCode = errEncrypted;
259
catalog = new Catalog(xref);
260
if (catalog && !catalog->isOk()) {
261
if (!wasReconstructed)
263
// try one more time to contruct the Catalog, maybe the problem is damaged XRef
266
xref = new XRef(str, 0, 0, NULL, true);
267
catalog = new Catalog(xref);
270
if (catalog && !catalog->isOk()) {
271
error(-1, "Couldn't read page catalog");
272
errCode = errBadCatalog;
283
for (int i = 0; i < getNumPages(); i++) {
291
#ifndef DISABLE_OUTLINE
306
delete linearization;
320
// Check for a %%EOF at the end of this stream
321
GBool PDFDoc::checkFooter() {
322
// we look in the last 1024 chars because Adobe does the same
323
char *eof = new char[1025];
324
int pos = str->getPos();
325
str->setPos(1024, -1);
327
for (i = 0; i < 1024; i++)
337
for (i = i - 5; i >= 0; i--) {
338
if (strncmp (&eof[i], "%%EOF", 5) == 0) {
345
error(-1, "Document has not the mandatory ending %%EOF");
346
errCode = errDamaged;
355
// Check for a PDF header on this stream. Skip past some garbage
357
void PDFDoc::checkHeader() {
358
char hdrBuf[headerSearchSize+1];
365
for (i = 0; i < headerSearchSize; ++i) {
366
hdrBuf[i] = str->getChar();
368
hdrBuf[headerSearchSize] = '\0';
369
for (i = 0; i < headerSearchSize - 5; ++i) {
370
if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
374
if (i >= headerSearchSize - 5) {
375
error(-1, "May not be a PDF file (continuing anyway)");
379
if (!(p = strtok_r(&hdrBuf[i+5], " \t\n\r", &tokptr))) {
380
error(-1, "May not be a PDF file (continuing anyway)");
383
sscanf(p, "%d.%d", &pdfMajorVersion, &pdfMinorVersion);
384
// We don't do the version check. Don't add it back in.
387
GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
392
xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
393
if ((encrypted = encrypt.isDict())) {
394
if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
395
if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
396
// authorization succeeded
397
xref->setEncryption(secHdlr->getPermissionFlags(),
398
secHdlr->getOwnerPasswordOk(),
399
secHdlr->getFileKey(),
400
secHdlr->getFileKeyLength(),
401
secHdlr->getEncVersion(),
402
secHdlr->getEncRevision(),
403
secHdlr->getEncAlgorithm());
406
// authorization failed
410
// couldn't find the matching security handler
414
// document is not encrypted
421
void PDFDoc::displayPage(OutputDev *out, int page,
422
double hDPI, double vDPI, int rotate,
423
GBool useMediaBox, GBool crop, GBool printing,
424
GBool (*abortCheckCbk)(void *data),
425
void *abortCheckCbkData,
426
GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
427
void *annotDisplayDecideCbkData) {
428
if (globalParams->getPrintCommands()) {
429
printf("***** page %d *****\n", page);
433
getPage(page)->display(out, hDPI, vDPI,
434
rotate, useMediaBox, crop, printing, catalog,
435
abortCheckCbk, abortCheckCbkData,
436
annotDisplayDecideCbk, annotDisplayDecideCbkData);
440
void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
441
double hDPI, double vDPI, int rotate,
442
GBool useMediaBox, GBool crop, GBool printing,
443
GBool (*abortCheckCbk)(void *data),
444
void *abortCheckCbkData,
445
GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
446
void *annotDisplayDecideCbkData) {
449
for (page = firstPage; page <= lastPage; ++page) {
450
displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
451
abortCheckCbk, abortCheckCbkData,
452
annotDisplayDecideCbk, annotDisplayDecideCbkData);
456
void PDFDoc::displayPageSlice(OutputDev *out, int page,
457
double hDPI, double vDPI, int rotate,
458
GBool useMediaBox, GBool crop, GBool printing,
459
int sliceX, int sliceY, int sliceW, int sliceH,
460
GBool (*abortCheckCbk)(void *data),
461
void *abortCheckCbkData,
462
GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
463
void *annotDisplayDecideCbkData) {
465
getPage(page)->displaySlice(out, hDPI, vDPI,
466
rotate, useMediaBox, crop,
467
sliceX, sliceY, sliceW, sliceH,
469
abortCheckCbk, abortCheckCbkData,
470
annotDisplayDecideCbk, annotDisplayDecideCbkData);
473
Links *PDFDoc::getLinks(int page) {
474
Page *p = getPage(page);
478
return new Links (&obj, NULL);
480
return p->getLinks(catalog);
483
void PDFDoc::processLinks(OutputDev *out, int page) {
485
getPage(page)->processLinks(out, catalog);
488
Linearization *PDFDoc::getLinearization()
490
if (!linearization) {
491
linearization = new Linearization(str);
493
return linearization;
496
GBool PDFDoc::isLinearized() {
497
if ((str->getLength()) &&
498
(getLinearization()->getLength() == str->getLength()))
505
get_id (GooString *encodedidstring, GooString *id) {
506
const char *encodedid = encodedidstring->getCString();
507
char pdfid[pdfIdLength + 1];
510
if (encodedidstring->getLength() != pdfIdLength / 2)
513
n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
514
encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff,
515
encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, encodedid[7] & 0xff,
516
encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff,
517
encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
518
if (n != pdfIdLength)
521
id->Set(pdfid, pdfIdLength);
525
GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
527
xref->getTrailerDict()->dictLookup ("ID", &obj);
529
if (obj.isArray() && obj.arrayGetLength() == 2) {
533
if (obj.arrayGet(0, &obj2)->isString()) {
534
if (!get_id (obj2.getString(), permanent_id)) {
539
error(-1, "Invalid permanent ID");
547
if (obj.arrayGet(1, &obj2)->isString()) {
548
if (!get_id (obj2.getString(), update_id)) {
553
error(-1, "Invalid update ID");
569
Hints *PDFDoc::getHints()
571
if (!hints && isLinearized()) {
572
hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
578
int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
583
if (!(f = fopen(name->getCString(), "wb"))) {
584
error(-1, "Couldn't open file '%s'", name->getCString());
587
outStr = new FileOutStream(f,0);
588
res = saveAs(outStr, mode);
594
int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) {
596
// we don't support files with Encrypt at the moment
598
xref->getTrailerDict()->getDict()->lookupNF("Encrypt", &obj);
606
if (mode == writeForceRewrite) {
607
saveCompleteRewrite(outStr);
608
} else if (mode == writeForceIncremental) {
609
saveIncrementalUpdate(outStr);
610
} else { // let poppler decide
611
// find if we have updated objects
612
GBool updated = gFalse;
613
for(int i=0; i<xref->getNumObjects(); i++) {
614
if (xref->getEntry(i)->updated) {
620
saveIncrementalUpdate(outStr);
622
// simply copy the original file
623
saveWithoutChangesAs (outStr);
630
int PDFDoc::saveWithoutChangesAs(GooString *name) {
635
if (!(f = fopen(name->getCString(), "wb"))) {
636
error(-1, "Couldn't open file '%s'", name->getCString());
640
outStr = new FileOutStream(f,0);
641
res = saveWithoutChangesAs(outStr);
649
int PDFDoc::saveWithoutChangesAs(OutStream *outStr) {
653
while ((c = str->getChar()) != EOF) {
661
void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
665
//copy the original file
667
while ((c = str->getChar()) != EOF) {
673
uxref->add(0, 65535, 0, gFalse);
674
for(int i=0; i<xref->getNumObjects(); i++) {
675
if ((xref->getEntry(i)->type == xrefEntryFree) &&
676
(xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
679
if (xref->getEntry(i)->updated) { //we have an updated object
683
ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
684
xref->fetch(ref.num, ref.gen, &obj1);
685
Guint offset = writeObject(&obj1, &ref, outStr);
686
uxref->add(ref.num, ref.gen, offset, gTrue);
690
if (uxref->getSize() == 0) { //we have nothing to update
695
Guint uxrefOffset = outStr->getPos();
696
uxref->writeToFile(outStr, gFalse /* do not write unnecessary entries */);
698
writeTrailer(uxrefOffset, xref->getSize(), outStr, gTrue);
703
void PDFDoc::saveCompleteRewrite (OutStream* outStr)
705
outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
706
XRef *uxref = new XRef();
707
uxref->add(0, 65535, 0, gFalse);
708
for(int i=0; i<xref->getNumObjects(); i++) {
711
XRefEntryType type = xref->getEntry(i)->type;
712
if (type == xrefEntryFree) {
714
ref.gen = xref->getEntry(i)->gen;
715
/* the XRef class adds a lot of irrelevant free entries, we only want the significant one
716
and we don't want the one with num=0 because it has already been added (gen = 65535)*/
717
if (ref.gen > 0 && ref.num > 0)
718
uxref->add(ref.num, ref.gen, 0, gFalse);
719
} else if (type == xrefEntryUncompressed){
721
ref.gen = xref->getEntry(i)->gen;
722
xref->fetch(ref.num, ref.gen, &obj1);
723
Guint offset = writeObject(&obj1, &ref, outStr);
724
uxref->add(ref.num, ref.gen, offset, gTrue);
726
} else if (type == xrefEntryCompressed) {
728
ref.gen = 0; //compressed entries have gen == 0
729
xref->fetch(ref.num, ref.gen, &obj1);
730
Guint offset = writeObject(&obj1, &ref, outStr);
731
uxref->add(ref.num, ref.gen, offset, gTrue);
735
Guint uxrefOffset = outStr->getPos();
736
uxref->writeToFile(outStr, gTrue /* write all entries */);
738
writeTrailer(uxrefOffset, uxref->getSize(), outStr, gFalse);
745
void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr)
748
outStr->printf("<<");
749
for (int i=0; i<dict->getLength(); i++) {
750
GooString keyName(dict->getKey(i));
751
GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
752
outStr->printf("/%s ", keyNameToPrint->getCString());
753
delete keyNameToPrint;
754
writeObject(dict->getValNF(i, &obj1), NULL, outStr);
757
outStr->printf(">> ");
760
void PDFDoc::writeStream (Stream* str, OutStream* outStr)
762
outStr->printf("stream\r\n");
764
for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
765
outStr->printf("%c", c);
767
outStr->printf("\r\nendstream\r\n");
770
void PDFDoc::writeRawStream (Stream* str, OutStream* outStr)
773
str->getDict()->lookup("Length", &obj1);
775
error (-1, "PDFDoc::writeRawStream, no Length in stream dict");
779
const int length = obj1.getInt();
782
outStr->printf("stream\r\n");
783
str->unfilteredReset();
784
for (int i=0; i<length; i++) {
785
int c = str->getUnfilteredChar();
786
outStr->printf("%c", c);
789
outStr->printf("\r\nendstream\r\n");
792
void PDFDoc::writeString (GooString* s, OutStream* outStr)
794
if (s->hasUnicodeMarker()) {
795
//unicode string don't necessary end with \0
796
const char* c = s->getCString();
798
for(int i=0; i<s->getLength(); i++) {
799
char unescaped = *(c+i)&0x000000ff;
801
if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
802
outStr->printf("%c", '\\');
803
outStr->printf("%c", unescaped);
805
outStr->printf(") ");
807
const char* c = s->getCString();
809
for(int i=0; i<s->getLength(); i++) {
810
char unescaped = (*c)&0x000000ff;
812
if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
813
outStr->printf("%c", '\\');
814
outStr->printf("%c", unescaped);
817
outStr->printf(") ");
821
Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
825
Guint offset = outStr->getPos();
829
outStr->printf("%i %i obj ", ref->num, ref->gen);
831
switch (obj->getType()) {
833
outStr->printf("%s ", obj->getBool()?"true":"false");
836
outStr->printf("%i ", obj->getInt());
841
s.appendf("{0:.10g}", obj->getReal());
842
outStr->printf("%s ", s.getCString());
846
writeString(obj->getString(), outStr);
850
GooString name(obj->getName());
851
GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
852
outStr->printf("/%s ", nameToPrint->getCString());
857
outStr->printf( "null ");
860
array = obj->getArray();
862
for (int i=0; i<array->getLength(); i++) {
863
writeObject(array->getNF(i, &obj1), NULL,outStr);
866
outStr->printf("] ");
869
writeDictionnary (obj->getDict(),outStr);
873
//We can't modify stream with the current implementation (no write functions in Stream API)
874
// => the only type of streams which that have been modified are internal streams (=strWeird)
875
Stream *stream = obj->getStream();
876
if (stream->getKind() == strWeird) {
877
//we write the stream unencoded => TODO: write stream encoder
879
//recalculate stream length
881
for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
885
stream->getDict()->set("Length", &obj1);
887
//Remove Stream encoding
888
stream->getDict()->remove("Filter");
889
stream->getDict()->remove("DecodeParms");
891
writeDictionnary (stream->getDict(),outStr);
892
writeStream (stream,outStr);
896
FilterStream *fs = dynamic_cast<FilterStream*>(stream);
898
BaseStream *bs = fs->getBaseStream();
901
if (xref->getStreamEnd(bs->getStart(), &streamEnd)) {
903
val.initInt(streamEnd - bs->getStart());
904
stream->getDict()->set("Length", &val);
908
writeDictionnary (stream->getDict(), outStr);
909
writeRawStream (stream, outStr);
914
outStr->printf("%i %i R ", obj->getRef().num, obj->getRef().gen);
917
outStr->printf("cmd\r\n");
920
outStr->printf("error\r\n");
923
outStr->printf("eof\r\n");
926
outStr->printf("none\r\n");
929
error(-1,"Unhandled objType : %i, please report a bug with a testcase\r\n", obj->getType());
933
outStr->printf("endobj\r\n");
937
void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate)
939
Dict *trailerDict = new Dict(xref);
941
obj1.initInt(uxrefSize);
942
trailerDict->set("Size", &obj1);
946
//build a new ID, as recommended in the reference, uses:
950
// - values of entry in information dictionnary
953
sprintf(buffer, "%i", (int)time(NULL));
954
message.append(buffer);
956
message.append(fileName);
958
message.append("streamwithoutfilename.pdf");
960
unsigned int fileSize = 0;
963
while ((c = str->getChar()) != EOF) {
967
sprintf(buffer, "%i", fileSize);
968
message.append(buffer);
970
//info dict -- only use text string
971
if (xref->getDocInfo(&obj1)->isDict()) {
972
for(int i=0; i<obj1.getDict()->getLength(); i++) {
974
obj1.getDict()->getVal(i, &obj2);
975
if (obj2.isString()) {
976
message.append(obj2.getString());
983
//calculate md5 digest
985
Decrypt::md5((Guchar*)message.getCString(), message.getLength(), digest);
986
obj1.initString(new GooString((const char*)digest, 16));
989
Object obj2,obj3,obj4,obj5;
990
obj2.initArray(xref);
993
//only update the second part of the array
994
if(xref->getTrailerDict()->getDict()->lookup("ID", &obj4) != NULL) {
995
if (!obj4.isArray()) {
996
error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an array. Trying to continue");
998
//Get the first part of the ID
999
obj4.arrayGet(0,&obj3);
1001
obj2.arrayAdd(&obj3);
1002
obj2.arrayAdd(&obj1);
1003
trailerDict->set("ID", &obj2);
1007
//new file => same values for the two identifiers
1008
obj2.arrayAdd(&obj1);
1009
obj1.initString(new GooString((const char*)digest, 16));
1010
obj2.arrayAdd(&obj1);
1011
trailerDict->set("ID", &obj2);
1015
obj1.initRef(xref->getRootNum(), xref->getRootGen());
1016
trailerDict->set("Root", &obj1);
1019
obj1.initInt(getStartXRef());
1020
trailerDict->set("Prev", &obj1);
1023
xref->getDocInfoNF(&obj5);
1024
if (!obj5.isNull()) {
1025
trailerDict->set("Info", &obj5);
1028
outStr->printf( "trailer\r\n");
1029
writeDictionnary(trailerDict, outStr);
1030
outStr->printf( "\r\nstartxref\r\n");
1031
outStr->printf( "%i\r\n", uxrefOffset);
1032
outStr->printf( "%%%%EOF\r\n");
1037
#ifndef DISABLE_OUTLINE
1038
Outline *PDFDoc::getOutline()
1042
outline = new Outline(catalog->getOutline(), xref);
1049
PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
1051
PDFDoc *doc = new PDFDoc();
1052
doc->errCode = errorCode;
1053
doc->fileName = fileNameA;
1058
Guint PDFDoc::strToUnsigned(char *s) {
1064
for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
1065
x = 10 * x + (*p - '0');
1070
// Read the 'startxref' position.
1071
Guint PDFDoc::getStartXRef()
1073
if (startXRefPos == ~(Guint)0) {
1075
if (isLinearized()) {
1076
char buf[linearizationSearchSize+1];
1080
for (n = 0; n < linearizationSearchSize; ++n) {
1081
if ((c = str->getChar()) == EOF) {
1088
// find end of first obj
1090
for (i = 0; i < n; i++) {
1091
if (!strncmp("endobj", &buf[i], 6)) {
1097
char buf[xrefSearchSize+1];
1101
// read last xrefSearchSize bytes
1102
str->setPos(xrefSearchSize, -1);
1103
for (n = 0; n < xrefSearchSize; ++n) {
1104
if ((c = str->getChar()) == EOF) {
1112
for (i = n - 9; i >= 0; --i) {
1113
if (!strncmp(&buf[i], "startxref", 9)) {
1120
for (p = &buf[i+9]; isspace(*p); ++p) ;
1121
startXRefPos = strToUnsigned(p);
1126
return startXRefPos;
1129
Guint PDFDoc::getMainXRefEntriesOffset()
1131
Guint mainXRefEntriesOffset = 0;
1133
if (isLinearized()) {
1134
mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
1137
return mainXRefEntriesOffset;
1140
int PDFDoc::getNumPages()
1142
if (isLinearized()) {
1144
if ((n = getLinearization()->getNumPages())) {
1149
return catalog->getNumPages();
1152
Page *PDFDoc::parsePage(int page)
1159
pageRef.num = getHints()->getPageObjectNum(page);
1161
error(-1, "Failed to get object num from hint tables for page %d", page);
1165
// check for bogus ref - this can happen in corrupted PDF files
1166
if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
1167
error(-1, "Invalid object num (%d) for page %d", pageRef.num, page);
1171
pageRef.gen = xref->getEntry(pageRef.num)->gen;
1172
xref->fetch(pageRef.num, pageRef.gen, &obj);
1173
if (!obj.isDict()) {
1175
error(-1, "Object (%d %d) is not a pageDict", pageRef.num, pageRef.gen);
1178
pageDict = obj.getDict();
1180
p = new Page(xref, page, pageDict, pageRef,
1181
new PageAttrs(NULL, pageDict), catalog->getForm());
1187
Page *PDFDoc::getPage(int page)
1189
if ((page < 1) || page > getNumPages()) return NULL;
1191
if (isLinearized()) {
1193
pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
1194
for (int i = 0; i < getNumPages(); i++) {
1195
pageCache[i] = NULL;
1198
if (!pageCache[page-1]) {
1199
pageCache[page-1] = parsePage(page);
1201
if (pageCache[page-1]) {
1202
return pageCache[page-1];
1204
error(-1, "Failed parsing page %d using hint tables", page);
1208
return catalog->getPage(page);