1
/* $Header: /home/cvsroot/dvipdfmx/src/spc_html.c,v 1.5 2005/08/14 15:50:36 chofchof Exp $
3
This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
5
Copyright (C) 2002 by Jin-Hwan Cho and Shunsaku Hirata,
6
the dvipdfmx project team <dvipdfmx@project.ktug.or.kr>
8
Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
10
This program is free software; you can redistribute it and/or modify
11
it under the terms of the GNU General Public License as published by
12
the Free Software Foundation; either version 2 of the License, or
13
(at your option) any later version.
15
This program is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License
21
along with this program; if not, write to the Free Software
22
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
36
#include "pdfximage.h"
46
#define ENABLE_HTML_IMG_SUPPORT 1
47
#define ENABLE_HTML_SVG_TRANSFORM 1
48
#define ENABLE_HTML_SVG_OPACITY 1
51
* Please rewrite this or remove html special support
54
#define ANCHOR_TYPE_HREF 0
55
#define ANCHOR_TYPE_NAME 1
68
static struct spc_html_ _html_state = {
74
#ifdef ENABLE_HTML_SVG_TRANSFORM
75
static int cvt_a_to_tmatrix (pdf_tmatrix *M, const char *ptr, char **nextptr);
76
#endif /* ENABLE_HTML_SVG_TRANSFORM */
81
char *_p = (char *) (s); \
82
while (*(_p) != 0) { \
83
if (*(_p) >= 'A' && *(_p) <= 'Z') { \
84
*(_p) = (*(_p) - 'A') + 'a'; \
91
parse_key_val (char **pp, char *endptr, char **kp, char **vp)
96
for (p = *pp ; p < endptr && isspace(*p); p++);
98
while (!error && p < endptr &&
99
((*p >= 'a' && *p <= 'z') ||
100
(*p >= 'A' && *p <= 'Z'))
106
((*p >= 'a' && *p <= 'z') ||
107
(*p >= 'A' && *p <= 'Z') ||
108
(*p >= '0' && *p <= '9') ||
109
*p == '-' || *p == ':'
119
k = NEW(n + 1, char);
120
memcpy(k, q, n); k[n] = '\0';
121
if (p + 2 >= endptr || p[0] != '=' || (p[1] != '\"' && p[1] != '\'')) {
122
RELEASE(k); k = NULL;
127
p += 2; /* skip '="' */
128
for (q = p, n = 0; p < endptr && *p != qchr; p++, n++);
129
if (p == endptr || *p != qchr)
132
v = NEW(n + 1, char);
133
memcpy(v, q, n); v[n] = '\0';
135
pdf_add_dict(t->attr,
137
pdf_new_string(v, n));
146
for ( ; p < endptr && isspace(*p); p++);
150
*kp = k; *vp = v; *pp = p;
154
#define HTML_TAG_NAME_MAX 127
155
#define HTML_TAG_TYPE_EMPTY 1
156
#define HTML_TAG_TYPE_OPEN 1
157
#define HTML_TAG_TYPE_CLOSE 2
160
read_html_tag (char *name, pdf_obj *attr, int *type, char **pp, char *endptr)
163
int n = 0, error = 0;
165
for ( ; p < endptr && isspace(*p); p++);
166
if (p >= endptr || *p != '<')
169
*type = HTML_TAG_TYPE_OPEN;
170
for (++p; p < endptr && isspace(*p); p++);
171
if (p < endptr && *p == '/') {
172
*type = HTML_TAG_TYPE_CLOSE;
173
for (++p; p < endptr && isspace(*p); p++);
176
#define ISDELIM(c) ((c) == '>' || (c) == '/' || isspace(c))
177
for (n = 0; p < endptr && n < HTML_TAG_NAME_MAX && !ISDELIM(*p); n++, p++) {
181
if (n == 0 || p == endptr || !ISDELIM(*p)) {
186
for ( ; p < endptr && isspace(*p); p++);
187
while (p < endptr && !error && *p != '/' && *p != '>') {
188
char *kp = NULL, *vp = NULL;
189
error = parse_key_val(&p, endptr, &kp, &vp);
194
pdf_new_string(vp, strlen(vp) + 1)); /* include trailing NULL here!!! */
198
for ( ; p < endptr && isspace(*p); p++);
205
if (p < endptr && *p == '/') {
206
*type = HTML_TAG_TYPE_EMPTY;
207
for (++p; p < endptr && isspace(*p); p++);
209
if (p == endptr || *p != '>') {
222
spc_handler_html__init (struct spc_env *spe, struct spc_arg *ap, void *dp)
224
struct spc_html_ *sd = dp;
226
sd->link_dict = NULL;
228
sd->pending_type = -1;
234
spc_handler_html__clean (struct spc_env *spe, struct spc_arg *ap, void *dp)
236
struct spc_html_ *sd = dp;
239
RELEASE(sd->baseurl);
241
if (sd->pending_type >= 0 || sd->link_dict)
242
spc_warn(spe, "Unclosed html anchor found.");
245
pdf_release_obj(sd->link_dict);
247
sd->pending_type = -1;
249
sd->link_dict = NULL;
256
spc_handler_html__bophook (struct spc_env *spe, struct spc_arg *ap, void *dp)
258
struct spc_html_ *sd = dp;
260
if (sd->pending_type >= 0) {
261
spc_warn(spe, "...html anchor continues from previous page processed...");
268
spc_handler_html__eophook (struct spc_env *spe, struct spc_arg *ap, void *dp)
270
struct spc_html_ *sd = dp;
272
if (sd->pending_type >= 0) {
273
spc_warn(spe, "Unclosed html anchor at end-of-page!");
281
fqurl (const char *baseurl, const char *name)
288
len += strlen(baseurl) + 1; /* we may want to add '/' */
290
q = NEW(len + 1, char);
292
if (baseurl && baseurl[0]) {
295
p = q + strlen(q) - 1;
298
if (name[0] && name[0] != '/')
307
html_open_link (struct spc_env *spe, const char *name, struct spc_html_ *sd)
313
ASSERT( sd->link_dict == NULL ); /* Should be checked somewhere else */
315
sd->link_dict = pdf_new_dict();
316
pdf_add_dict(sd->link_dict,
317
pdf_new_name("Type"), pdf_new_name ("Annot"));
318
pdf_add_dict(sd->link_dict,
319
pdf_new_name("Subtype"), pdf_new_name ("Link"));
321
color = pdf_new_array ();
322
pdf_add_array(color, pdf_new_number(0.0));
323
pdf_add_array(color, pdf_new_number(0.0));
324
pdf_add_array(color, pdf_new_number(1.0));
325
pdf_add_dict(sd->link_dict, pdf_new_name("C"), color);
327
url = fqurl(sd->baseurl, name);
329
/* url++; causes memory leak in RELEASE(url) */
330
pdf_add_dict(sd->link_dict,
331
pdf_new_name("Dest"),
332
pdf_new_string(url+1, strlen(url+1)));
333
} else { /* Assume this is URL */
334
pdf_obj *action = pdf_new_dict();
336
pdf_new_name("Type"),
337
pdf_new_name("Action"));
340
pdf_new_name("URI"));
343
pdf_new_string(url, strlen(url)));
344
pdf_add_dict(sd->link_dict,
346
pdf_link_obj(action));
347
pdf_release_obj(action);
351
spc_begin_annot(spe, sd->link_dict);
353
sd->pending_type = ANCHOR_TYPE_HREF;
359
html_open_dest (struct spc_env *spe, const char *name, struct spc_html_ *sd)
362
pdf_obj *array, *page_ref;
365
cp.x = spe->x_user; cp.y = spe->y_user;
366
pdf_dev_transform(&cp, NULL);
368
page_ref = pdf_doc_this_page_ref();
369
ASSERT( page_ref ); /* Otherwise must be bug */
371
array = pdf_new_array();
372
pdf_add_array(array, page_ref);
373
pdf_add_array(array, pdf_new_name("XYZ"));
374
pdf_add_array(array, pdf_new_null());
375
pdf_add_array(array, pdf_new_number(cp.y + 24.0));
376
pdf_add_array(array, pdf_new_null());
378
error = pdf_doc_add_names("Dests",
383
spc_warn(spe, "Failed to add named destination: %s", name);
385
sd->pending_type = ANCHOR_TYPE_NAME;
390
#define ANCHOR_STARTED(s) ((s)->pending_type >= 0 || (s)->link_dict)
393
spc_html__anchor_open (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
395
pdf_obj *href, *name;
398
if (ANCHOR_STARTED(sd)) {
399
spc_warn(spe, "Nested html anchors found!");
403
href = pdf_lookup_dict(attr, "href");
404
name = pdf_lookup_dict(attr, "name");
406
spc_warn(spe, "Sorry, you can't have both \"href\" and \"name\" in anchor tag...");
409
error = html_open_link(spe, pdf_string_value(href), sd);
410
} else if (name) { /* name */
411
error = html_open_dest(spe, pdf_string_value(name), sd);
413
spc_warn(spe, "You should have \"href\" or \"name\" in anchor tag!");
421
spc_html__anchor_close (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
425
switch (sd->pending_type) {
426
case ANCHOR_TYPE_HREF:
429
pdf_release_obj(sd->link_dict);
430
sd->link_dict = NULL;
431
sd->pending_type = -1;
433
spc_warn(spe, "Closing html anchor (link) without starting!");
437
case ANCHOR_TYPE_NAME:
438
sd->pending_type = -1;
441
spc_warn(spe, "No corresponding opening tag for html anchor.");
450
spc_html__base_empty (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
455
href = pdf_lookup_dict(attr, "href");
457
spc_warn(spe, "\"href\" not found for \"base\" tag!");
461
vp = (char *) pdf_string_value(href);
463
spc_warn(spe, "\"baseurl\" changed: \"%s\" --> \"%s\"", sd->baseurl, vp);
464
RELEASE(sd->baseurl);
466
sd->baseurl = NEW(strlen(vp) + 1, char);
467
strcpy(sd->baseurl, vp);
473
#ifdef ENABLE_HTML_IMG_SUPPORT
474
/* This isn't completed.
475
* Please think about placement of images.
478
atopt (const char *a)
480
char *q, *p = (char *) a;
482
const char *_ukeys[] = {
488
"pt", "in", "cm", "mm", "bp",
495
q = parse_float_decimal(&p, p + strlen(p));
497
WARN("Invalid length value: %s (%c)", a, *p);
504
q = parse_c_ident(&p, p + strlen(p));
506
for (k = 0; _ukeys[k] && strcmp(_ukeys[k], q); k++);
508
case K_UNIT__PT: u *= 72.0 / 72.27; break;
509
case K_UNIT__IN: u *= 72.0; break;
510
case K_UNIT__CM: u *= 72.0 / 2.54 ; break;
511
case K_UNIT__MM: u *= 72.0 / 25.4 ; break;
512
case K_UNIT__BP: u *= 1.0 ; break;
513
case K_UNIT__PX: u *= 1.0 ; break; /* 72dpi */
515
WARN("Unknown unit of measure: %s", q);
525
#ifdef ENABLE_HTML_SVG_OPACITY
526
/* Replicated from spc_tpic */
528
create_xgstate (double a /* alpha */, int f_ais /* alpha is shape */)
532
dict = pdf_new_dict();
534
pdf_new_name("Type"),
535
pdf_new_name("ExtGState"));
549
check_resourcestatus (const char *category, const char *resname)
551
pdf_obj *dict1, *dict2;
553
dict1 = pdf_doc_current_page_resources();
557
dict2 = pdf_lookup_dict(dict1, category);
559
pdf_obj_typeof(dict2) == PDF_DICT) {
560
if (pdf_lookup_dict(dict2, resname))
565
#endif /* ENABLE_HTML_SVG_OPACITY */
568
spc_html__img_empty (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
573
#ifdef ENABLE_HTML_SVG_OPACITY
574
double alpha = 1.0; /* meaning fully opaque */
575
#endif /* ENABLE_HTML_SVG_OPACITY */
576
#ifdef ENABLE_HTML_SVG_TRANSFORM
579
pdf_setmatrix(&M, 1.0, 0.0, 0.0, 1.0, spe->x_user, spe->y_user);
580
#endif /* ENABLE_HTML_SVG_TRANSFORM */
582
spc_warn(spe, "html \"img\" tag found (not completed, plese don't use!).");
584
src = pdf_lookup_dict(attr, "src");
586
spc_warn(spe, "\"src\" attribute not found for \"img\" tag!");
590
transform_info_clear(&ti);
591
obj = pdf_lookup_dict(attr, "width");
593
ti.width = atopt(pdf_string_value(obj));
594
ti.flags |= INFO_HAS_WIDTH;
596
obj = pdf_lookup_dict(attr, "height");
598
ti.height = atopt(pdf_string_value(obj));
599
ti.flags |= INFO_HAS_HEIGHT;
602
#ifdef ENABLE_HTML_SVG_OPACITY
603
obj = pdf_lookup_dict(attr, "svg:opacity");
605
alpha = atof(pdf_string_value(obj));
606
if (alpha < 0.0 || alpha > 1.0) {
607
spc_warn(spe, "Invalid opacity value: %s", pdf_string_value(obj));
611
#endif /* ENABLE_HTML_SVG_OPCAITY */
613
#ifdef ENABLE_HTML_SVG_TRANSFORM
614
obj = pdf_lookup_dict(attr, "svg:transform");
616
char *p = (char *) pdf_string_value(obj);
618
for ( ; *p && isspace(*p); p++);
619
while (*p && !error) {
620
pdf_setmatrix(&N, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0);
621
error = cvt_a_to_tmatrix(&N, p, &p);
624
pdf_concatmatrix(&M, &N);
625
for ( ; *p && isspace(*p); p++);
627
for (++p; *p && isspace(*p); p++);
631
#endif /* ENABLE_HTML_SVG_TRANSFORM */
634
spc_warn(spe, "Error in html \"img\" tag attribute.");
638
id = pdf_ximage_findresource(pdf_string_value(src), 0, 0);
640
spc_warn(spe, "Could not find/load image: %s", pdf_string_value(src));
643
#if defined(ENABLE_HTML_SVG_TRANSFORM) || defined(ENABLE_HTML_SVG_OPACITY)
652
#ifdef ENABLE_HTML_SVG_OPACITY
655
int a = round(100.0 * alpha);
657
res_name = NEW(strlen("_Tps_a100_") + 1, char);
658
sprintf(res_name, "_Tps_a%03d_", a); /* Not Tps prefix but... */
659
if (!check_resourcestatus("ExtGState", res_name)) {
660
dict = create_xgstate(round_at(0.01 * a, 0.01), 0);
661
pdf_doc_add_page_resource("ExtGState",
662
res_name, pdf_ref_obj(dict));
663
pdf_release_obj(dict);
665
pdf_doc_add_page_content(" /", 2);
666
pdf_doc_add_page_content(res_name, strlen(res_name));
667
pdf_doc_add_page_content(" gs", 3);
671
#endif /* ENABLE_HTML_SVG_OPACITY */
675
pdf_ximage_scale_image(id, &M, &r, &ti);
678
pdf_dev_rectclip(r.llx, r.lly, r.urx - r.llx, r.ury - r.lly);
680
res_name = pdf_ximage_get_resname(id);
681
pdf_doc_add_page_content(" /", 2);
682
pdf_doc_add_page_content(res_name, strlen(res_name));
683
pdf_doc_add_page_content(" Do", 3);
687
pdf_doc_add_page_resource("XObject",
689
pdf_ximage_get_reference(id));
692
pdf_dev_put_image(id, &ti, spe->x_user, spe->y_user);
693
#endif /* ENABLE_HTML_SVG_XXX */
700
spc_html__img_empty (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
702
spc_warn(spe, "IMG tag not yet supported yet...");
705
#endif /* ENABLE_HTML_IMG_SUPPORT */
709
spc_handler_html_default (struct spc_env *spe, struct spc_arg *ap)
711
struct spc_html_ *sd = &_html_state;
712
char name[HTML_TAG_NAME_MAX + 1];
714
int error = 0, type = HTML_TAG_TYPE_OPEN;
716
if (ap->curptr >= ap->endptr)
719
attr = pdf_new_dict();
720
error = read_html_tag(name, attr, &type, &ap->curptr, ap->endptr);
722
pdf_release_obj(attr);
725
if (!strcmp(name, "a")) {
727
case HTML_TAG_TYPE_OPEN:
728
error = spc_html__anchor_open (spe, attr, sd);
730
case HTML_TAG_TYPE_CLOSE:
731
error = spc_html__anchor_close(spe, attr, sd);
734
spc_warn(spe, "Empty html anchor tag???");
738
} else if (!strcmp(name, "base")) {
739
if (type == HTML_TAG_TYPE_CLOSE) {
740
spc_warn(spe, "Close tag for \"base\"???");
742
} else { /* treat "open" same as "empty" */
743
error = spc_html__base_empty(spe, attr, sd);
745
} else if (!strcmp(name, "img")) {
746
if (type == HTML_TAG_TYPE_CLOSE) {
747
spc_warn(spe, "Close tag for \"img\"???");
749
} else { /* treat "open" same as "empty" */
750
error = spc_html__img_empty(spe, attr, sd);
753
pdf_release_obj(attr);
755
for ( ; ap->curptr < ap->endptr && isspace(ap->curptr[0]); ap->curptr++);
761
#ifdef ENABLE_HTML_SVG_TRANSFORM
762
/* translate wsp* '(' wsp* number (comma-wsp number)? wsp* ')' */
764
cvt_a_to_tmatrix (pdf_tmatrix *M, const char *ptr, char **nextptr)
766
char *q, *p = (char *) ptr;
769
static const char *_tkeys[] = {
770
#define K_TRNS__MATRIX 0
771
"matrix", /* a b c d e f */
772
#define K_TRNS__TRANSLATE 1
773
"translate", /* tx [ty] : dflt. tf = 0 */
774
#define K_TRNS__SCALE 2
775
"scale", /* sx [sy] : dflt. sy = sx */
776
#define K_TRNS__ROTATE 3
777
"rotate", /* ang [cx cy] : dflt. cx, cy = 0 */
778
#define K_TRNS__SKEWX 4
779
#define K_TRNS__SKEWY 5
786
for ( ; *p && isspace(*p); p++);
788
q = parse_c_ident(&p, p + strlen(p));
791
/* parsed transformation key */
792
for (k = 0; _tkeys[k] && strcmp(q, _tkeys[k]); k++);
796
for ( ; *p && isspace(*p); p++);
797
if (*p != '(' || *(p + 1) == 0)
799
for (++p; *p && isspace(*p); p++);
800
for (n = 0; n < 6 && *p && *p != ')'; n++) {
801
q = parse_float_decimal(&p, p + strlen(p));
808
for ( ; *p && isspace(*p); p++);
810
for (++p; *p && isspace(*p); p++);
822
M->a = v[0]; M->c = v[1];
823
M->b = v[2]; M->d = v[3];
824
M->e = v[4]; M->f = v[5];
826
case K_TRNS__TRANSLATE:
827
if (n != 1 && n != 2)
831
M->e = v[0]; M->f = (n == 2) ? v[1] : 0.0;
834
if (n != 1 && n != 2)
836
M->a = v[0]; M->d = (n == 2) ? v[1] : v[0];
841
if (n != 1 && n != 3)
843
M->a = cos(v[0] * M_PI / 180.0);
844
M->c = sin(v[0] * M_PI / 180.0);
845
M->b = -M->c; M->d = M->a;
846
M->e = (n == 3) ? v[1] : 0.0;
847
M->f = (n == 3) ? v[2] : 0.0;
854
M->b = tan(v[0] * M_PI / 180.0);
860
M->c = tan(v[0] * M_PI / 180.0);
869
#endif /* ENABLE_HTML_SVG_TRANSFORM */
872
spc_html_at_begin_document (void)
874
struct spc_html_ *sd = &_html_state;
875
return spc_handler_html__init(NULL, NULL, sd);
879
spc_html_at_begin_page (void)
881
struct spc_html_ *sd = &_html_state;
882
return spc_handler_html__bophook(NULL, NULL, sd);
886
spc_html_at_end_page (void)
888
struct spc_html_ *sd = &_html_state;
889
return spc_handler_html__eophook(NULL, NULL, sd);
893
spc_html_at_end_document (void)
895
struct spc_html_ *sd = &_html_state;
896
return spc_handler_html__clean(NULL, NULL, sd);
901
spc_html_check_special (const char *buffer, long size)
908
for ( ; p < endptr && isspace(*p); p++);
909
size = (long) (endptr - p);
910
if (size >= strlen("html:") &&
911
!memcmp(p, "html:", strlen("html:"))) {
920
spc_html_setup_handler (struct spc_handler *sph,
921
struct spc_env *spe, struct spc_arg *ap)
923
ASSERT(sph && spe && ap);
925
for ( ; ap->curptr < ap->endptr && isspace(ap->curptr[0]); ap->curptr++);
926
if (ap->curptr + strlen("html:") > ap->endptr ||
927
memcmp(ap->curptr, "html:", strlen("html:"))) {
931
ap->command = (char *) "";
933
sph->key = (char *) "html:";
934
sph->exec = &spc_handler_html_default;
936
ap->curptr += strlen("html:");
937
for ( ; ap->curptr < ap->endptr && isspace(ap->curptr[0]); ap->curptr++);