1
//========================================================================
5
// Copyright 1997-2002 Glyph & Cog, LLC
7
// Changed 1999-2000 by G.Ovtcharov
9
// Changed 2002 by Mikhail Kruk
11
//========================================================================
13
//========================================================================
15
// Modified under the Poppler project - http://poppler.freedesktop.org
17
// All changes made under the Poppler project to this file are licensed
18
// under GPL version 2 or later
20
// Copyright (C) 2005-2010 Albert Astals Cid <aacid@kde.org>
21
// Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org>
22
// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
23
// Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp>
24
// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
25
// Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
26
// Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org>
27
// Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
28
// Copyright (C) 2010 Adrian Johnson <ajohnson@redneon.com>
29
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
30
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
32
// To see a description of the changes please see the Changelog file that
33
// came with your tarball or type make ChangeLog if you are building from git
35
//========================================================================
38
#pragma implementation
48
#include "goo/GooString.h"
49
#include "goo/GooList.h"
50
#include "UnicodeMap.h"
55
#include "PNGWriter.h"
57
#include "DCTStream.h"
59
#include "GlobalParams.h"
60
#include "HtmlOutputDev.h"
61
#include "HtmlFonts.h"
63
int HtmlPage::pgNum=0;
64
int HtmlOutputDev::imgNum=1;
65
GooList *HtmlOutputDev::imgList=new GooList();
67
extern GBool complexMode;
68
extern GBool singleHtml;
70
extern GBool printCommands;
71
extern GBool printHtml;
72
extern GBool noframes;
75
extern GBool showHidden;
78
static GooString* basename(GooString* str){
80
char *p=str->getCString();
81
int len=str->getLength();
82
for (int i=len-1;i>=0;i--)
84
return new GooString((p+i+1),len-i-1);
85
return new GooString(str);
89
static GooString* Dirname(GooString* str){
91
char *p=str->getCString();
92
int len=str->getLength();
93
for (int i=len-1;i>=0;i--)
95
return new GooString(p,i+1);
96
return new GooString();
100
//------------------------------------------------------------------------
102
//------------------------------------------------------------------------
104
HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts) {
108
state->transform(state->getCurX(), state->getCurY(), &x, &y);
109
if ((font = state->getFont())) {
110
double ascent = font->getAscent();
111
double descent = font->getDescent();
113
//printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent );
116
if( descent < -0.4 ){
117
//printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent );
120
yMin = y - ascent * fontSize;
121
yMax = y - descent * fontSize;
123
state->getFillRGB(&rgb);
124
GooString *name = state->getFont()->getName();
125
if (!name) name = HtmlFont::getDefaultFont(); //new GooString("default");
126
HtmlFont hfont=HtmlFont(name, static_cast<int>(fontSize-1), rgb);
127
fontpos = fonts->AddFont(hfont);
129
// this means that the PDF file draws text without a current font,
130
// which should never happen
131
yMin = y - 0.95 * fontSize;
132
yMax = y + 0.35 * fontSize;
136
// this is a sanity check for a case that shouldn't happen -- but
137
// if it does happen, we want to avoid dividing by zero later
148
htext=new GooString();
149
dir = textDirUnknown;
153
HtmlString::~HtmlString() {
159
void HtmlString::addChar(GfxState *state, double x, double y,
160
double dx, double dy, Unicode u) {
161
if (dir == textDirUnknown) {
162
//dir = UnicodeMap::getDirection(u);
163
dir = textDirLeftRight;
168
text = (Unicode *)grealloc(text, size * sizeof(Unicode));
169
xRight = (double *)grealloc(xRight, size * sizeof(double));
175
xMax = xRight[len] = x + dx;
176
//printf("added char: %f %f xright = %f\n", x, dx, x+dx);
180
void HtmlString::endString()
182
if( dir == textDirRightLeft && len > 1 )
184
//printf("will reverse!\n");
185
for (int i = 0; i < len / 2; i++)
187
Unicode ch = text[i];
188
text[i] = text[len - i - 1];
189
text[len - i - 1] = ch;
194
//------------------------------------------------------------------------
196
//------------------------------------------------------------------------
198
HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) {
199
this->rawOrder = rawOrder;
203
yxCur1 = yxCur2 = NULL;
204
fonts=new HtmlFontAccu();
205
links=new HtmlLinks();
211
imgExt = new GooString(imgExtVal);
214
HtmlPage::~HtmlPage() {
216
if (DocName) delete DocName;
217
if (fonts) delete fonts;
218
if (links) delete links;
219
if (imgExt) delete imgExt;
222
void HtmlPage::updateFont(GfxState *state) {
229
// adjust the font size
230
fontSize = state->getTransformedFontSize();
231
if ((font = state->getFont()) && font->getType() == fontType3) {
232
// This is a hack which makes it possible to deal with some Type 3
233
// fonts. The problem is that it's impossible to know what the
234
// base coordinate system used in the font is without actually
235
// rendering the font. This code tries to guess by looking at the
236
// width of the character 'm' (which breaks if the font is a
237
// subset that doesn't contain 'm').
238
for (code = 0; code < 256; ++code) {
239
if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
240
name[0] == 'm' && name[1] == '\0') {
245
w = ((Gfx8BitFont *)font)->getWidth(code);
247
// 600 is a generic average 'm' width -- yes, this is a hack
251
fm = font->getFontMatrix();
253
fontSize *= fabs(fm[3] / fm[0]);
258
void HtmlPage::beginString(GfxState *state, GooString *s) {
259
curStr = new HtmlString(state, fontSize, fonts);
263
void HtmlPage::conv(){
268
for(tmp=yxStrings;tmp;tmp=tmp->yxNext){
269
int pos=tmp->fontpos;
270
// printf("%d\n",pos);
273
if (tmp->htext) delete tmp->htext;
274
tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len);
276
if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){
277
tmp->link = links->getLink(linkIndex);
278
/*GooString *t=tmp->htext;
279
tmp->htext=links->getLink(k)->Link(tmp->htext);
287
void HtmlPage::addChar(GfxState *state, double x, double y,
288
double dx, double dy,
289
double ox, double oy, Unicode *u, int uLen) {
290
double x1, y1, w1, h1, dx2, dy2;
292
state->transform(x, y, &x1, &y1);
295
// check that new character is in the same direction as current string
296
// and is not too far away from it before adding
297
//if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||
301
fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin))) {
303
beginString(state, NULL);
305
state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
309
state->transformDelta(dx, dy, &w1, &h1);
314
for (i = 0; i < uLen; ++i) {
315
curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
319
void HtmlPage::endString() {
323
// throw away zero-length strings -- they don't have valid xMin/xMax
324
// values, and they're useless anyway
325
if (curStr->len == 0) {
334
if (curStr->yMax - curStr->yMin > 20) {
341
// insert string in y-major list
342
h = curStr->yMax - curStr->yMin;
343
y1 = curStr->yMin + 0.5 * h;
344
y2 = curStr->yMin + 0.8 * h;
348
} else if ((!yxCur1 ||
349
(y1 >= yxCur1->yMin &&
350
(y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
352
(y1 < yxCur2->yMin ||
353
(y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
357
for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
358
if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
372
static const char *strrstr( const char *s, const char *ss )
374
const char *p = strstr( s, ss );
375
for( const char *pp = p; pp != NULL; pp = strstr( p+1, ss ) ){
381
static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic, GBool &finish_bold )
383
const char *last_italic = finish_italic && ( finish_bold || finish_a ) ? strrstr( htext->getCString(), "<i>" ) : NULL;
384
const char *last_bold = finish_bold && ( finish_italic || finish_a ) ? strrstr( htext->getCString(), "<b>" ) : NULL;
385
const char *last_a = finish_a && ( finish_italic || finish_bold ) ? strrstr( htext->getCString(), "<a " ) : NULL;
386
if( finish_a && ( finish_italic || finish_bold ) && last_a > ( last_italic > last_bold ? last_italic : last_bold ) ){
387
htext->append("</a>", 4);
390
if( finish_italic && finish_bold && last_italic > last_bold ){
391
htext->append("</i>", 4);
392
finish_italic = false;
395
htext->append("</b>", 4);
397
htext->append("</i>", 4);
399
htext->append("</a>");
402
void HtmlPage::coalesce() {
403
HtmlString *str1, *str2;
404
HtmlFont *hfont1, *hfont2;
405
double space, horSpace, vertSpace, vertOverlap;
406
GBool addSpace, addLineBreak;
410
#if 0 //~ for debugging
411
for (str1 = yxStrings; str1; str1 = str1->yxNext) {
412
printf("x=%f..%f y=%f..%f size=%2d '",
413
str1->xMin, str1->xMax, str1->yMin, str1->yMax,
414
(int)(str1->yMax - str1->yMin));
415
for (i = 0; i < str1->len; ++i) {
416
fputc(str1->text[i] & 0xff, stdout);
420
printf("\n------------------------------------------------------------\n\n");
426
//----- discard duplicated text (fake boldface, drop shadows)
428
{ /* if not in complex mode get rid of duplicate strings */
433
double size = str1->yMax - str1->yMin;
434
double xLimit = str1->xMin + size * 0.2;
436
for (str2 = str1, str3 = str1->yxNext;
437
str3 && str3->xMin < xLimit;
438
str2 = str3, str3 = str2->yxNext)
440
if (str3->len == str1->len &&
441
!memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
442
fabs(str3->yMin - str1->yMin) < size * 0.2 &&
443
fabs(str3->yMax - str1->yMax) < size * 0.2 &&
444
fabs(str3->xMax - str1->xMax) < size * 0.2)
447
//printf("found duplicate!\n");
453
str2->xyNext = str3->xyNext;
454
str2->yxNext = str3->yxNext;
462
} /*- !complexMode */
466
hfont1 = getFont(str1);
467
if( hfont1->isBold() )
468
str1->htext->insert(0,"<b>",3);
469
if( hfont1->isItalic() )
470
str1->htext->insert(0,"<i>",3);
471
if( str1->getLink() != NULL ) {
472
GooString *ls = str1->getLink()->getLinkStart();
473
str1->htext->insert(0, ls);
476
curX = str1->xMin; curY = str1->yMin;
478
while (str1 && (str2 = str1->yxNext)) {
479
hfont2 = getFont(str2);
480
space = str1->yMax - str1->yMin;
481
horSpace = str2->xMin - str1->xMax;
482
addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4);
483
vertSpace = str2->yMin - str1->yMax;
485
//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
487
if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax)
489
vertOverlap = str1->yMax - str2->yMin;
491
if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax)
493
vertOverlap = str2->yMax - str1->yMin;
503
(rawOrder && vertOverlap > 0.5 * space)
505
(!rawOrder && str2->yMin < str1->yMax)
507
(horSpace > -0.5 * space && horSpace < space)
509
(vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)
511
(!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
512
str1->dir == str2->dir // text direction the same
516
n = str1->len + str2->len;
517
if ((addSpace = horSpace > 0.1 * space)) {
524
str1->size = (n + 15) & ~15;
525
str1->text = (Unicode *)grealloc(str1->text,
526
str1->size * sizeof(Unicode));
527
str1->xRight = (double *)grealloc(str1->xRight,
528
str1->size * sizeof(double));
530
str1->text[str1->len] = 0x20;
531
str1->htext->append(xml?" ":" ");
532
str1->xRight[str1->len] = str2->xMin;
536
str1->text[str1->len] = '\n';
537
str1->htext->append("<br>");
538
str1->xRight[str1->len] = str2->xMin;
540
str1->yMin = str2->yMin;
541
str1->yMax = str2->yMax;
542
str1->xMax = str2->xMax;
543
int fontLineSize = hfont1->getLineSize();
544
int curLineSize = (int)(vertSpace + space);
545
if( curLineSize != fontLineSize )
547
HtmlFont *newfnt = new HtmlFont(*hfont1);
548
newfnt->setLineSize(curLineSize);
549
str1->fontpos = fonts->AddFont(*newfnt);
551
hfont1 = getFont(str1);
552
// we have to reget hfont2 because it's location could have
554
hfont2 = getFont(str2);
557
for (i = 0; i < str2->len; ++i) {
558
str1->text[str1->len] = str2->text[i];
559
str1->xRight[str1->len] = str2->xRight[i];
563
/* fix <i>, <b> if str1 and str2 differ and handle switch of links */
564
HtmlLink *hlink1 = str1->getLink();
565
HtmlLink *hlink2 = str2->getLink();
566
bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2);
567
GBool finish_a = switch_links && hlink1 != NULL;
568
GBool finish_italic = hfont1->isItalic() && ( !hfont2->isItalic() || finish_a );
569
GBool finish_bold = hfont1->isBold() && ( !hfont2->isBold() || finish_a || finish_italic );
570
CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
571
if( switch_links && hlink2 != NULL ) {
572
GooString *ls = hlink2->getLinkStart();
573
str1->htext->append(ls);
576
if( ( !hfont1->isItalic() || finish_italic ) && hfont2->isItalic() )
577
str1->htext->append("<i>", 3);
578
if( ( !hfont1->isBold() || finish_bold ) && hfont2->isBold() )
579
str1->htext->append("<b>", 3);
582
str1->htext->append(str2->htext);
583
// str1 now contains href for link of str2 (if it is defined)
584
str1->link = str2->link;
586
if (str2->xMax > str1->xMax) {
587
str1->xMax = str2->xMax;
589
if (str2->yMax > str1->yMax) {
590
str1->yMax = str2->yMax;
592
str1->yxNext = str2->yxNext;
594
} else { // keep strings separate
596
GBool finish_a = str1->getLink() != NULL;
597
GBool finish_bold = hfont1->isBold();
598
GBool finish_italic = hfont1->isItalic();
599
CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
601
str1->xMin = curX; str1->yMin = curY;
603
curX = str1->xMin; curY = str1->yMin;
605
if( hfont1->isBold() )
606
str1->htext->insert(0,"<b>",3);
607
if( hfont1->isItalic() )
608
str1->htext->insert(0,"<i>",3);
609
if( str1->getLink() != NULL ) {
610
GooString *ls = str1->getLink()->getLinkStart();
611
str1->htext->insert(0, ls);
616
str1->xMin = curX; str1->yMin = curY;
618
GBool finish_bold = hfont1->isBold();
619
GBool finish_italic = hfont1->isItalic();
620
GBool finish_a = str1->getLink() != NULL;
621
CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
623
#if 0 //~ for debugging
624
for (str1 = yxStrings; str1; str1 = str1->yxNext) {
625
printf("x=%3d..%3d y=%3d..%3d size=%2d ",
626
(int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
627
(int)(str1->yMax - str1->yMin));
628
printf("'%s'\n", str1->htext->getCString());
630
printf("\n------------------------------------------------------------\n\n");
635
void HtmlPage::dumpAsXML(FILE* f,int page){
636
fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
637
fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth);
639
for(int i=fontsPageMarker;i < fonts->size();i++) {
640
GooString *fontCSStyle = fonts->CSStyle(i);
641
fprintf(f,"\t%s\n",fontCSStyle->getCString());
645
GooString *str, *str1 = NULL;
646
for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
648
str=new GooString(tmp->htext);
649
fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
650
fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
651
fprintf(f,"font=\"%d\">", tmp->fontpos);
652
str1=fonts->getCSStyle(tmp->fontpos, str);
653
fputs(str1->getCString(),f);
656
fputs("</text>\n",f);
659
fputs("</page>\n",f);
663
void HtmlPage::dumpComplex(FILE *file, int page){
668
if( firstPage == -1 ) firstPage = page;
672
GooString* pgNum=GooString::fromInt(page);
673
tmp = new GooString(DocName);
675
tmp->append('-')->append(pgNum)->append(".html");
676
pageFile = fopen(tmp->getCString(), "w");
678
tmp->append("-html")->append(".html");
679
pageFile = fopen(tmp->getCString(), "a");
683
error(-1, "Couldn't open html file '%s'", tmp->getCString());
689
fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page);
691
fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString());
695
htmlEncoding = HtmlOutputDev::mapEncodingToHtml
696
(globalParams->getTextEncodingName());
698
fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
700
fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n <br>\n", htmlEncoding);
705
fprintf(pageFile,"<!-- Page %d -->\n", page);
706
fprintf(pageFile,"<a name=\"%d\"></a>\n", page);
709
fprintf(pageFile,"<DIV style=\"position:relative;width:%d;height:%d;\">\n",
710
pageWidth, pageHeight);
712
tmp=basename(DocName);
714
fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
715
for(int i=fontsPageMarker;i!=fonts->size();i++) {
716
GooString *fontCSStyle;
718
fontCSStyle = fonts->CSStyle(i);
720
fontCSStyle = fonts->CSStyle(i,page);
721
fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
725
fputs("-->\n</STYLE>\n",pageFile);
729
fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile);
735
"<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\">\n",
736
pageWidth, pageHeight, tmp->getCString(),
737
(page-firstPage+1), imgExt->getCString());
742
GooString *str, *str1 = NULL;
743
for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
745
str=new GooString(tmp1->htext);
747
"<DIV style=\"position:absolute;top:%d;left:%d\">",
748
xoutRound(tmp1->yMin),
749
xoutRound(tmp1->xMin));
750
fputs("<nobr>",pageFile);
752
str1=fonts->getCSStyle(tmp1->fontpos, str);
754
str1=fonts->getCSStyle(tmp1->fontpos, str, page);
755
fputs(str1->getCString(),pageFile);
758
fputs("</nobr></DIV>\n",pageFile);
762
fputs("</DIV>\n", pageFile);
766
fputs("</BODY>\n</HTML>\n",pageFile);
772
void HtmlPage::dump(FILE *f, int pageNum)
774
if (complexMode || singleHtml)
776
if (xml) dumpAsXML(f, pageNum);
777
if (!xml) dumpComplex(f, pageNum);
781
fprintf(f,"<A name=%d></a>",pageNum);
782
// Loop over the list of image names on this page
783
int listlen=HtmlOutputDev::imgList->getLength();
784
for (int i = 0; i < listlen; i++) {
785
GooString *fName= (GooString *)HtmlOutputDev::imgList->del(0);
786
fprintf(f,"<IMG src=\"%s\"><br>\n",fName->getCString());
789
HtmlOutputDev::imgNum=1;
792
for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
794
str=new GooString(tmp->htext);
795
fputs(str->getCString(),f);
806
void HtmlPage::clear() {
813
for (p1 = yxStrings; p1; p1 = p2) {
819
yxCur1 = yxCur2 = NULL;
824
fonts=new HtmlFontAccu();
829
fontsPageMarker = fonts->size();
833
links=new HtmlLinks();
838
void HtmlPage::setDocName(char *fname){
839
DocName=new GooString(fname);
842
//------------------------------------------------------------------------
844
//------------------------------------------------------------------------
846
HtmlMetaVar::HtmlMetaVar(char *_name, char *_content)
848
name = new GooString(_name);
849
content = new GooString(_content);
852
HtmlMetaVar::~HtmlMetaVar()
858
GooString* HtmlMetaVar::toString()
860
GooString *result = new GooString("<META name=\"");
861
result->append(name);
862
result->append("\" content=\"");
863
result->append(content);
864
result->append("\">");
868
//------------------------------------------------------------------------
870
//------------------------------------------------------------------------
872
static char* HtmlEncodings[][2] = {
873
{"Latin1", "ISO-8859-1"},
878
char* HtmlOutputDev::mapEncodingToHtml(GooString* encoding)
880
char* enc = encoding->getCString();
881
for(int i = 0; HtmlEncodings[i][0] != NULL; i++)
883
if( strcmp(enc, HtmlEncodings[i][0]) == 0 )
885
return HtmlEncodings[i][1];
891
void HtmlOutputDev::doFrame(int firstPage){
892
GooString* fName=new GooString(Docname);
894
fName->append(".html");
896
if (!(fContentsFrame = fopen(fName->getCString(), "w"))){
897
error(-1, "Couldn't open html file '%s'", fName->getCString());
904
fName=basename(Docname);
905
fputs(DOCTYPE_FRAMES, fContentsFrame);
906
fputs("\n<HTML>",fContentsFrame);
907
fputs("\n<HEAD>",fContentsFrame);
908
fprintf(fContentsFrame,"\n<TITLE>%s</TITLE>",docTitle->getCString());
909
htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
910
fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
911
dumpMetaVars(fContentsFrame);
912
fprintf(fContentsFrame, "</HEAD>\n");
913
fputs("<FRAMESET cols=\"100,*\">\n",fContentsFrame);
914
fprintf(fContentsFrame,"<FRAME name=\"links\" src=\"%s_ind.html\">\n",fName->getCString());
915
fputs("<FRAME name=\"contents\" src=",fContentsFrame);
917
fprintf(fContentsFrame,"\"%s-%d.html\"",fName->getCString(), firstPage);
919
fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString());
921
fputs(">\n</FRAMESET>\n</HTML>\n",fContentsFrame);
924
fclose(fContentsFrame);
927
HtmlOutputDev::HtmlOutputDev(char *fileName, char *title,
928
char *author, char *keywords, char *subject, char *date,
930
GBool rawOrder, int firstPage, GBool outline)
934
fContentsFrame = NULL;
935
docTitle = new GooString(title);
939
this->rawOrder = rawOrder;
940
this->doOutline = outline;
943
//this->firstPage = firstPage;
947
pages = new HtmlPage(rawOrder, extension);
949
glMetaVars = new GooList();
950
glMetaVars->append(new HtmlMetaVar("generator", "pdftohtml 0.36"));
951
if( author ) glMetaVars->append(new HtmlMetaVar("author", author));
952
if( keywords ) glMetaVars->append(new HtmlMetaVar("keywords", keywords));
953
if( date ) glMetaVars->append(new HtmlMetaVar("date", date));
954
if( subject ) glMetaVars->append(new HtmlMetaVar("subject", subject));
959
pages->setDocName(fileName);
960
Docname=new GooString (fileName);
962
// for non-xml output (complex or simple) with frames generate the left frame
963
if(!xml && !noframes)
967
GooString* left=new GooString(fileName);
968
left->append("_ind.html");
972
if (!(fContentsFrame = fopen(left->getCString(), "w")))
974
error(-1, "Couldn't open html file '%s'", left->getCString());
979
fputs(DOCTYPE, fContentsFrame);
980
fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame);
984
GooString *str = basename(Docname);
985
fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
990
{ /* not in complex mode */
992
GooString* right=new GooString(fileName);
993
right->append("s.html");
995
if (!(page=fopen(right->getCString(),"w"))){
996
error(-1, "Couldn't open html file '%s'", right->getCString());
1001
fputs(DOCTYPE, page);
1002
fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",page);
1007
if (stout) page=stdout;
1009
GooString* right=new GooString(fileName);
1010
if (!xml) right->append(".html");
1011
if (xml) right->append(".xml");
1012
if (!(page=fopen(right->getCString(),"w"))){
1013
error(-1, "Couldn't open html file '%s'", right->getCString());
1020
htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
1023
fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding);
1024
fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page);
1025
fputs("<pdf2xml>\n",page);
1029
fprintf(page,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n",
1030
DOCTYPE, docTitle->getCString());
1032
fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
1035
fprintf(page,"</HEAD>\n");
1036
fprintf(page,"<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
1042
HtmlOutputDev::~HtmlOutputDev() {
1048
deleteGooList(glMetaVars, HtmlMetaVar);
1050
if (fContentsFrame){
1051
fputs("</BODY>\n</HTML>\n",fContentsFrame);
1052
fclose(fContentsFrame);
1055
fputs("</pdf2xml>\n",page);
1058
if ( !complexMode || xml || noframes )
1060
fputs("</BODY>\n</HTML>\n",page);
1067
void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
1072
GooString* fname=Dirname(Docname);
1073
fname->append("image.log");
1074
if((tin=fopen(getFileNameFromPath(fname->getCString(),fname->getLength()),"w"))==NULL){
1075
printf("Error : can not open %s",fname);
1079
// if(state->getRotation()!=0)
1080
// fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
1082
fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());
1087
this->pageNum = pageNum;
1088
GooString *str=basename(Docname);
1095
fprintf(fContentsFrame,"<A href=\"%s-%d.html\"",str->getCString(),pageNum);
1097
fprintf(fContentsFrame,"<A href=\"%ss.html#%d\"",str->getCString(),pageNum);
1098
fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br>\n",pageNum);
1102
pages->pageWidth=static_cast<int>(state->getPageWidth());
1103
pages->pageHeight=static_cast<int>(state->getPageHeight());
1109
void HtmlOutputDev::endPage() {
1110
Links *linksList = docPage->getLinks(catalog);
1111
for (int i = 0; i < linksList->getNumLinks(); ++i)
1113
doProcessLink(linksList->getLink(i));
1119
pages->dump(page, pageNum);
1121
// I don't yet know what to do in the case when there are pages of different
1122
// sizes and we want complex output: running ghostscript many times
1123
// seems very inefficient. So for now I'll just use last page's size
1124
maxPageWidth = pages->pageWidth;
1125
maxPageHeight = pages->pageHeight;
1127
//if(!noframes&&!xml) fputs("<br>\n", fContentsFrame);
1128
if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum));
1131
void HtmlOutputDev::updateFont(GfxState *state) {
1132
pages->updateFont(state);
1135
void HtmlOutputDev::beginString(GfxState *state, GooString *s) {
1136
pages->beginString(state, s);
1139
void HtmlOutputDev::endString(GfxState *state) {
1143
void HtmlOutputDev::drawChar(GfxState *state, double x, double y,
1144
double dx, double dy,
1145
double originX, double originY,
1146
CharCode code, int /*nBytes*/, Unicode *u, int uLen)
1148
if ( !showHidden && (state->getRender() & 3) == 3) {
1151
pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen);
1154
void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
1155
int width, int height, GBool invert,
1156
GBool interpolate, GBool inlineImg) {
1158
if (ignore||complexMode) {
1159
OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
1166
int x0, y0; // top left corner of image
1167
int w0, h0, w1, h1; // size of image
1168
double xt, yt, wt, ht;
1169
GBool rotate, xFlip, yFlip;
1171
// get image position and size
1172
state->transform(0, 0, &xt, &yt);
1173
state->transformDelta(1, 1, &wt, &ht);
1178
x0 = xoutRound(xt + wt);
1179
w0 = xoutRound(-wt);
1185
y0 = xoutRound(yt + ht);
1186
h0 = xoutRound(-ht);
1188
state->transformDelta(1, 0, &xt, &yt);
1189
rotate = fabs(xt) < fabs(yt);
1203
if (dumpJPEG && str->getKind() == strDCT) {
1204
GooString *fName=new GooString(Docname);
1206
GooString *pgNum=GooString::fromInt(pageNum);
1207
GooString *imgnum=GooString::fromInt(imgNum);
1208
// open the image file
1209
fName->append(pgNum)->append("_")->append(imgnum)->append(".jpg");
1214
if (!(f1 = fopen(fName->getCString(), "wb"))) {
1215
error(-1, "Couldn't open image file '%s'", fName->getCString());
1220
// initialize stream
1221
str = ((DCTStream *)str)->getRawStream();
1225
while ((c = str->getChar()) != EOF)
1230
if (fName) imgList->append(fName);
1233
OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
1237
void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
1238
int width, int height, GfxImageColorMap *colorMap,
1239
GBool interpolate, int *maskColors, GBool inlineImg) {
1241
if (ignore||complexMode) {
1242
OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate,
1243
maskColors, inlineImg);
1250
int x0, y0; // top left corner of image
1251
int w0, h0, w1, h1; // size of image
1252
double xt, yt, wt, ht;
1253
GBool rotate, xFlip, yFlip;
1255
// get image position and size
1256
state->transform(0, 0, &xt, &yt);
1257
state->transformDelta(1, 1, &wt, &ht);
1262
x0 = xoutRound(xt + wt);
1263
w0 = xoutRound(-wt);
1269
y0 = xoutRound(yt + ht);
1270
h0 = xoutRound(-ht);
1272
state->transformDelta(1, 0, &xt, &yt);
1273
rotate = fabs(xt) < fabs(yt);
1287
/*if( !globalParams->getErrQuiet() )
1288
printf("image stream of kind %d\n", str->getKind());*/
1290
if (dumpJPEG && str->getKind() == strDCT) {
1291
GooString *fName=new GooString(Docname);
1293
GooString *pgNum= GooString::fromInt(pageNum);
1294
GooString *imgnum= GooString::fromInt(imgNum);
1296
// open the image file
1297
fName->append(pgNum)->append("_")->append(imgnum)->append(".jpg");
1303
if (!(f1 = fopen(fName->getCString(), "wb"))) {
1304
error(-1, "Couldn't open image file '%s'", fName->getCString());
1309
// initialize stream
1310
str = ((DCTStream *)str)->getRawStream();
1314
while ((c = str->getChar()) != EOF)
1319
if (fName) imgList->append(fName);
1322
#ifdef ENABLE_LIBPNG
1323
// Dump the image as a PNG file. Much of the PNG code
1324
// comes from an example by Guillaume Cottenceau.
1327
png_byte *row = (png_byte *) malloc(3 * width); // 3 bytes/pixel: RGB
1328
png_bytep *row_pointer= &row;
1330
// Create the image filename
1331
GooString *fName=new GooString(Docname);
1333
GooString *pgNum= GooString::fromInt(pageNum);
1334
GooString *imgnum= GooString::fromInt(imgNum);
1335
fName->append(pgNum)->append("_")->append(imgnum)->append(".png");
1339
// Open the image file
1340
if (!(f1 = fopen(fName->getCString(), "wb"))) {
1341
error(-1, "Couldn't open image file '%s'", fName->getCString());
1346
PNGWriter *writer = new PNGWriter();
1347
// TODO can we calculate the resolution of the image?
1348
if (!writer->init(f1, width, height, 72, 72)) {
1354
// Initialize the image stream
1355
ImageStream *imgStr = new ImageStream(str, width,
1356
colorMap->getNumPixelComps(), colorMap->getBits());
1360
for (int y = 0; y < height; y++) {
1362
// Convert into a PNG row
1363
p = imgStr->getLine();
1364
for (int x = 0; x < width; x++) {
1365
colorMap->getRGB(p, &rgb);
1366
// Write the RGB pixels into the row
1367
row[3*x]= colToByte(rgb.r);
1368
row[3*x+1]= colToByte(rgb.g);
1369
row[3*x+2]= colToByte(rgb.b);
1370
p += colorMap->getNumPixelComps();
1373
if (!writer->writeRow(row_pointer)) {
1385
imgList->append(fName);
1390
OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate,
1391
maskColors, inlineImg);
1398
void HtmlOutputDev::doProcessLink(Link* link){
1399
double _x1,_y1,_x2,_y2;
1402
link->getRect(&_x1,&_y1,&_x2,&_y2);
1403
cvtUserToDev(_x1,_y1,&x1,&y1);
1405
cvtUserToDev(_x2,_y2,&x2,&y2);
1408
GooString* _dest=getLinkDest(link,catalog);
1409
HtmlLink t((double) x1,(double) y2,(double) x2,(double) y1,_dest);
1414
GooString* HtmlOutputDev::getLinkDest(Link *link,Catalog* catalog){
1416
switch(link->getAction()->getKind())
1420
GooString* file=basename(Docname);
1422
LinkGoTo *ha=(LinkGoTo *)link->getAction();
1423
LinkDest *dest=NULL;
1424
if (ha->getDest()!=NULL)
1425
dest=ha->getDest()->copy();
1426
else if (ha->getNamedDest()!=NULL)
1427
dest=catalog->findDest(ha->getNamedDest());
1430
if (dest->isPageRef()){
1431
Ref pageref=dest->getPageRef();
1432
page=catalog->findPage(pageref.num,pageref.gen);
1435
page=dest->getPageNum();
1440
GooString *str=GooString::fromInt(page);
1442
frames file-4.html files.html#4
1443
noframes file.html#4 file.html#4
1447
file->append(".html#");
1456
file->append(".html");
1460
file->append("s.html#");
1465
if (printCommands) printf(" link to page %d ",page);
1471
return new GooString();
1476
LinkGoToR *ha=(LinkGoToR *) link->getAction();
1477
LinkDest *dest=NULL;
1479
GooString *file=new GooString();
1480
if (ha->getFileName()){
1482
file=new GooString(ha->getFileName()->getCString());
1484
if (ha->getDest()!=NULL) dest=ha->getDest()->copy();
1486
if (!(dest->isPageRef())) page=dest->getPageNum();
1489
if (printCommands) printf(" link to page %d ",page);
1491
p=file->getCString()+file->getLength()-4;
1492
if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){
1493
file->del(file->getLength()-4,4);
1494
file->append(".html");
1497
file->append(GooString::fromInt(page));
1500
if (printCommands && file) printf("filename %s\n",file->getCString());
1505
LinkURI *ha=(LinkURI *) link->getAction();
1506
GooString* file=new GooString(ha->getURI()->getCString());
1507
// printf("uri : %s\n",file->getCString());
1512
LinkLaunch *ha=(LinkLaunch *) link->getAction();
1513
GooString* file=new GooString(ha->getFileName()->getCString());
1515
p=file->getCString()+file->getLength()-4;
1516
if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){
1517
file->del(file->getLength()-4,4);
1518
file->append(".html");
1520
if (printCommands) printf("filename %s",file->getCString());
1527
return new GooString();
1531
void HtmlOutputDev::dumpMetaVars(FILE *file)
1535
for(int i = 0; i < glMetaVars->getLength(); i++)
1537
HtmlMetaVar *t = (HtmlMetaVar*)glMetaVars->get(i);
1538
var = t->toString();
1539
fprintf(file, "%s\n", var->getCString());
1544
GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
1546
FILE * output = NULL;
1547
GBool bClose = gFalse;
1552
Object *outlines = catalog->getOutline();
1553
if (!outlines->isDict())
1556
if (!complexMode && !xml)
1560
else if (complexMode && !xml)
1565
fputs("<hr>\n", output);
1569
GooString *str = Docname->copy();
1570
str->append("-outline.html");
1571
output = fopen(str->getCString(), "w");
1576
fputs("<HTML>\n<HEAD>\n<TITLE>Document Outline</TITLE>\n</HEAD>\n<BODY>\n", output);
1580
GBool done = newOutlineLevel(output, outlines, catalog);
1581
if (done && !complexMode)
1582
fputs("<hr>\n", output);
1586
fputs("</BODY>\n</HTML>\n", output);
1592
GBool HtmlOutputDev::newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level)
1595
GBool atLeastOne = gFalse;
1597
if (node->dictLookup("First", &curr)->isDict()) {
1600
fputs("<A name=\"outline\"></a>", output);
1601
fputs("<h1>Document Outline</h1>\n", output);
1603
fputs("<ul>",output);
1605
// get title, give up if not found
1607
if (curr.dictLookup("Title", &title)->isNull()) {
1611
GooString *titleStr = new GooString(title.getString());
1614
// get corresponding link
1615
// Note: some code duplicated from HtmlOutputDev::getLinkDest().
1616
GooString *linkName = NULL;;
1618
if (!curr.dictLookup("Dest", &dest)->isNull()) {
1619
LinkGoTo *link = new LinkGoTo(&dest);
1620
LinkDest *linkdest=NULL;
1621
if (link->getDest()!=NULL)
1622
linkdest=link->getDest()->copy();
1623
else if (link->getNamedDest()!=NULL)
1624
linkdest=catalog->findDest(link->getNamedDest());
1629
if (linkdest->isPageRef()) {
1630
Ref pageref=linkdest->getPageRef();
1631
page=catalog->findPage(pageref.num,pageref.gen);
1633
page=linkdest->getPageNum();
1638
frames file-4.html files.html#4
1639
noframes file.html#4 file.html#4
1641
linkName=basename(Docname);
1642
GooString *str=GooString::fromInt(page);
1644
linkName->append(".html#");
1645
linkName->append(str);
1648
linkName->append("-");
1649
linkName->append(str);
1650
linkName->append(".html");
1652
linkName->append("s.html#");
1653
linkName->append(str);
1661
fputs("<li>",output);
1663
fprintf(output,"<A href=\"%s\">", linkName->getCString());
1664
fputs(titleStr->getCString(),output);
1666
fputs("</A>",output);
1673
newOutlineLevel(output, &curr, catalog, level+1);
1674
curr.dictLookup("Next", &next);
1677
} while(curr.isDict());
1678
fputs("</ul>",output);