1
//========================================================================
6
// Copyright 1999-2000 G. Ovtcharov
7
//========================================================================
14
#include <poppler-config.h>
16
#include "parseargs.h"
17
#include "goo/GooString.h"
27
#include "HtmlOutputDev.h"
28
#include "PSOutputDev.h"
29
#include "GlobalParams.h"
32
#include "goo/gfile.h"
35
# define GHOSTSCRIPT "gs"
38
static int firstPage = 1;
39
static int lastPage = 0;
40
static GBool rawOrder = gTrue;
41
GBool printCommands = gTrue;
42
static GBool printHelp = gFalse;
43
GBool printHtml = gFalse;
44
GBool complexMode=gFalse;
46
//char extension[5]=".png";
48
GBool noframes=gFalse;
51
GBool errQuiet=gFalse;
54
GBool showHidden = gFalse;
55
GBool noMerge = gFalse;
56
static char ownerPassword[33] = "";
57
static char userPassword[33] = "";
58
static char gsDevice[33] = "png16m";
59
static GBool printVersion = gFalse;
61
static GooString* getInfoString(Dict *infoDict, char *key);
62
static GooString* getInfoDate(Dict *infoDict, char *key);
64
static char textEncName[128] = "";
66
static ArgDesc argDesc[] = {
67
{"-f", argInt, &firstPage, 0,
68
"first page to convert"},
69
{"-l", argInt, &lastPage, 0,
70
"last page to convert"},
71
/*{"-raw", argFlag, &rawOrder, 0,
72
"keep strings in content stream order"},*/
73
{"-q", argFlag, &errQuiet, 0,
74
"don't print any messages or errors"},
75
{"-h", argFlag, &printHelp, 0,
76
"print usage information"},
77
{"-help", argFlag, &printHelp, 0,
78
"print usage information"},
79
{"-p", argFlag, &printHtml, 0,
80
"exchange .pdf links by .html"},
81
{"-c", argFlag, &complexMode, 0,
82
"generate complex document"},
83
{"-i", argFlag, &ignore, 0,
85
{"-noframes", argFlag, &noframes, 0,
86
"generate no frames"},
87
{"-stdout" ,argFlag, &stout, 0,
88
"use standard output"},
89
{"-zoom", argFP, &scale, 0,
90
"zoom the pdf document (default 1.5)"},
91
{"-xml", argFlag, &xml, 0,
92
"output for XML post-processing"},
93
{"-hidden", argFlag, &showHidden, 0,
94
"output hidden text"},
95
{"-nomerge", argFlag, &noMerge, 0,
96
"do not merge paragraphs"},
97
{"-enc", argString, textEncName, sizeof(textEncName),
98
"output text encoding name"},
99
{"-dev", argString, gsDevice, sizeof(gsDevice),
100
"output device name for Ghostscript (png16m, jpeg etc)"},
101
{"-v", argFlag, &printVersion, 0,
102
"print copyright and version info"},
103
{"-opw", argString, ownerPassword, sizeof(ownerPassword),
104
"owner password (for encrypted files)"},
105
{"-upw", argString, userPassword, sizeof(userPassword),
106
"user password (for encrypted files)"},
107
{"-nodrm", argFlag, &noDrm, 0,
108
"override document DRM settings"},
112
int main(int argc, char *argv[]) {
114
GooString *fileName = NULL;
115
GooString *docTitle = NULL;
116
GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL;
117
GooString *htmlFileName = NULL;
118
GooString *psFileName = NULL;
119
HtmlOutputDev *htmlOut = NULL;
120
PSOutputDev *psOut = NULL;
123
char extension[16] = "png";
124
GooString *ownerPW, *userPW;
126
char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL};
129
ok = parseArgs(argDesc, &argc, argv);
130
if (!ok || argc < 2 || argc > 3 || printHelp || printVersion) {
131
fprintf(stderr, "pdftohtml version %s http://pdftohtml.sourceforge.net/, based on Xpdf version %s\n", "0.36", xpdfVersion);
132
fprintf(stderr, "%s\n", "Copyright 1999-2003 Gueorgui Ovtcharov and Rainer Dorsch");
133
fprintf(stderr, "%s\n\n", xpdfCopyright);
135
printUsage("pdftohtml", "<PDF-file> [<html-file> <xml-file>]", argDesc);
144
globalParams = new GlobalParams("");
147
globalParams->setErrQuiet(errQuiet);
148
printCommands = gFalse; // I'm not 100% what is the differecne between them
151
if (textEncName[0]) {
152
globalParams->setTextEncoding(textEncName);
153
if( !globalParams->getTextEncoding() ) {
159
if (ownerPassword[0]) {
160
ownerPW = new GooString(ownerPassword);
164
if (userPassword[0]) {
165
userPW = new GooString(userPassword);
170
fileName = new GooString(argv[1]);
172
doc = new PDFDoc(fileName, ownerPW, userPW);
183
// check for copy permission
184
if (!doc->okToCopy()) {
186
error(-1, "Copying of text from this document is not allowed.");
189
fprintf(stderr, "Document has copy-protection bit set.\n");
192
// construct text file name
194
GooString* tmp = new GooString(argv[2]);
195
p=tmp->getCString()+tmp->getLength()-5;
197
if (!strcmp(p, ".html") || !strcmp(p, ".HTML"))
198
htmlFileName = new GooString(tmp->getCString(),
199
tmp->getLength() - 5);
200
else htmlFileName =new GooString(tmp);
202
if (!strcmp(p, ".xml") || !strcmp(p, ".XML"))
203
htmlFileName = new GooString(tmp->getCString(),
204
tmp->getLength() - 5);
205
else htmlFileName =new GooString(tmp);
209
p = fileName->getCString() + fileName->getLength() - 4;
210
if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF"))
211
htmlFileName = new GooString(fileName->getCString(),
212
fileName->getLength() - 4);
214
htmlFileName = fileName->copy();
215
// htmlFileName->append(".html");
218
if (scale>3.0) scale=3.0;
219
if (scale<0.5) scale=0.5;
241
if (lastPage < 1 || lastPage > doc->getNumPages())
242
lastPage = doc->getNumPages();
244
doc->getDocInfo(&info);
246
docTitle = getInfoString(info.getDict(), "Title");
247
author = getInfoString(info.getDict(), "Author");
248
keywords = getInfoString(info.getDict(), "Keywords");
249
subject = getInfoString(info.getDict(), "Subject");
250
date = getInfoDate(info.getDict(), "ModDate");
252
date = getInfoDate(info.getDict(), "CreationDate");
255
if( !docTitle ) docTitle = new GooString(htmlFileName);
257
/* determine extensions of output backgroun images */
259
for(i = 0; extsList[i]; i++)
261
if( strstr(gsDevice, extsList[i]) != (char *) NULL )
263
strncpy(extension, extsList[i], sizeof(extension));
268
rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
271
htmlOut = new HtmlOutputDev(htmlFileName->getCString(),
272
docTitle->getCString(),
273
author ? author->getCString() : NULL,
274
keywords ? keywords->getCString() : NULL,
275
subject ? subject->getCString() : NULL,
276
date ? date->getCString() : NULL,
280
doc->getCatalog()->getOutline()->isDict());
301
doc->displayPages(htmlOut, firstPage, lastPage, 72, 72, static_cast<int>(72*scale), 0, gTrue);
304
htmlOut->dumpDocOutline(doc->getCatalog());
308
if( complexMode && !xml && !ignore ) {
309
int h=xoutRound(htmlOut->getPageHeight()/scale);
310
int w=xoutRound(htmlOut->getPageWidth()/scale);
311
//int h=xoutRound(doc->getPageHeight(1)/scale);
312
//int w=xoutRound(doc->getPageWidth(1)/scale);
314
psFileName = new GooString(htmlFileName->getCString());
315
psFileName->append(".ps");
317
globalParams->setPSPaperWidth(w);
318
globalParams->setPSPaperHeight(h);
320
// globalParams->setPSNoText(gTrue);
321
psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
322
doc->getCatalog(), firstPage, lastPage, psModePS);
323
doc->displayPages(psOut, firstPage, lastPage, 72, 72,
324
static_cast<int>(72*scale), 0, gFalse);
327
/*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h,
328
psFileName->getCString());*/
330
GooString *gsCmd = new GooString(GHOSTSCRIPT);
331
GooString *tw, *th, *sc;
332
gsCmd->append(" -sDEVICE=");
333
gsCmd->append(gsDevice);
334
gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r");
335
sc = GooString::fromInt(static_cast<int>(72*scale));
337
gsCmd->append(" -sOutputFile=");
339
gsCmd->append(htmlFileName);
340
gsCmd->append("%03d.");
341
gsCmd->append(extension);
342
gsCmd->append("\" -g");
343
tw = GooString::fromInt(static_cast<int>(scale*w));
346
th = GooString::fromInt(static_cast<int>(scale*h));
348
gsCmd->append(" -q \"");
349
gsCmd->append(psFileName);
351
// printf("running: %s\n", gsCmd->getCString());
352
if( !executeCommand(gsCmd->getCString()) && !errQuiet) {
353
error(-1, "Failed to launch Ghostscript!\n");
355
unlink(psFileName->getCString());
368
if(globalParams) delete globalParams;
370
if(htmlFileName) delete htmlFileName;
373
// check for memory leaks
374
Object::memCheck(stderr);
380
static GooString* getInfoString(Dict *infoDict, char *key) {
382
GooString *s1 = NULL;
384
if (infoDict->lookup(key, &obj)->isString()) {
385
s1 = new GooString(obj.getString());
391
static GooString* getInfoDate(Dict *infoDict, char *key) {
394
int year, mon, day, hour, min, sec;
396
GooString *result = NULL;
399
if (infoDict->lookup(key, &obj)->isString()) {
400
s = obj.getString()->getCString();
401
if (s[0] == 'D' && s[1] == ':') {
404
if (sscanf(s, "%4d%2d%2d%2d%2d%2d",
405
&year, &mon, &day, &hour, &min, &sec) == 6) {
406
tmStruct.tm_year = year - 1900;
407
tmStruct.tm_mon = mon - 1;
408
tmStruct.tm_mday = day;
409
tmStruct.tm_hour = hour;
410
tmStruct.tm_min = min;
411
tmStruct.tm_sec = sec;
412
tmStruct.tm_wday = -1;
413
tmStruct.tm_yday = -1;
414
tmStruct.tm_isdst = -1;
415
mktime(&tmStruct); // compute the tm_wday and tm_yday fields
416
if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S+00:00", &tmStruct)) {
417
result = new GooString(buf);
419
result = new GooString(s);
422
result = new GooString(s);