2
/* $Id: parser.c,v 1.327 2004/01/08 21:26:00 pasky Exp $ */
8
#define _GNU_SOURCE /* strcasestr() */
19
#include "bfu/listmenu.h"
21
#include "config/options.h"
22
#include "config/kbdbind.h"
23
#include "document/html/frames.h"
24
#include "document/html/renderer.h"
25
#include "document/html/tables.h"
26
#include "globhist/globhist.h"
27
#include "intl/charsets.h"
28
#include "osdep/ascii.h"
29
#include "protocol/http/header.h"
30
#include "protocol/uri.h"
31
#include "sched/session.h"
32
#include "sched/task.h"
33
#include "terminal/draw.h"
34
#include "terminal/terminal.h"
35
#include "util/color.h"
36
#include "util/conv.h"
37
#include "util/error.h"
38
#include "util/fastfind.h"
39
#include "util/memdebug.h"
40
#include "util/memlist.h"
41
#include "util/memory.h"
42
#include "util/string.h"
43
#include "viewer/text/form.h"
44
#include "viewer/text/link.h"
45
#include "viewer/text/view.h"
48
#include "document/html/parser.h"
50
/* TODO: This needs rewrite. Yes, no kidding. */
53
unsigned char *action;
54
unsigned char *target;
59
#define NULL_STRUCT_FORM { NULL, NULL, 0, 0 }
61
INIT_LIST_HEAD(html_stack);
63
#define end_of_tag(c) ((c) == '>' || (c) == '<')
66
atchr(register unsigned char c)
68
return (c < 127 && (c > '>' || (c > ' ' && c != '=' && !end_of_tag(c))));
71
/* This function eats one html element. */
72
/* - e is pointer to the begining of the element (*e must be '<')
73
* - eof is pointer to the end of scanned area
74
* - parsed element name is stored in name, it's length is namelen
75
* - first attribute is stored in attr
76
* - end points to first character behind the html element */
77
/* It returns -1 when it failed (returned values in pointers are invalid) and
80
parse_element(register unsigned char *e, unsigned char *eof,
81
unsigned char **name, int *namelen,
82
unsigned char **attr, unsigned char **end)
84
#define next_char() if (++e == eof) return -1;
87
if (e >= eof || *e != '<') return -1;
92
if (*e == '/') next_char();
93
if (!isA(*e)) return -1;
95
while (isA(*e)) next_char();
97
if (!WHITECHAR(*e) && !end_of_tag(*e) && *e != '/' && *e != ':')
100
if (name && namelen) *namelen = e - *name;
102
while ((WHITECHAR(*e) || *e == '/' || *e == ':')) next_char();
104
/* Skip bad attribute */
105
while (!atchr(*e) && !end_of_tag(*e) && !WHITECHAR(*e)) next_char();
110
while (WHITECHAR(*e)) next_char();
112
/* Skip bad attribute */
113
while (!atchr(*e) && !end_of_tag(*e) && !WHITECHAR(*e)) next_char();
115
if (end_of_tag(*e)) goto end;
117
while (atchr(*e)) next_char();
118
while (WHITECHAR(*e)) next_char();
121
if (end_of_tag(*e)) goto end;
126
while (WHITECHAR(*e)) next_char();
129
unsigned char quote = *e;
133
while (*e != quote) next_char();
135
if (*e == quote) goto quoted_value;
137
while (!WHITECHAR(*e) && !end_of_tag(*e)) next_char();
140
while (WHITECHAR(*e)) next_char();
142
if (!end_of_tag(*e)) goto next_attr;
145
if (end) *end = e + (*e == '>');
150
#define realloc_chrs(x, l) \
151
mem_align_alloc(x, l, (l) + 1, sizeof(unsigned char), 0xFF)
153
#define add_chr(s, l, c) \
155
if (!realloc_chrs(&(s), l)) return NULL; \
160
/* Eat newlines when loading attribute value? */
161
static int get_attr_val_eat_nl = 0;
164
/* Parses html element attributes. */
165
/* - e is attr pointer previously get from parse_element,
166
* DON'T PASS HERE ANY OTHER VALUE!!!
167
* - name is searched attribute */
168
/* Returns allocated string containing the attribute, or NULL on unsuccess. */
170
get_attr_val(register unsigned char *e, unsigned char *name)
173
unsigned char *attr = NULL;
178
while (WHITECHAR(*e)) e++;
179
if (end_of_tag(*e) || !atchr(*e)) goto parse_error;
182
while (atchr(*n) && atchr(*e) && upcase(*e) == upcase(*n)) e++, n++;
183
found = !*n && !atchr(*e);
185
while (atchr(*e)) e++;
186
while (WHITECHAR(*e)) e++;
188
if (found) goto found_endattr;
192
while (WHITECHAR(*e)) e++;
196
while (!WHITECHAR(*e) && !end_of_tag(*e)) {
197
if (!*e) goto parse_error;
198
add_chr(attr, attrlen, *e);
202
unsigned char quote = *e;
205
while (*(++e) != quote) {
206
if (*e == ASCII_CR) continue;
207
if (!*e) goto parse_error;
208
if (*e != ASCII_TAB && *e != ASCII_LF)
209
add_chr(attr, attrlen, *e);
210
else if (!get_attr_val_eat_nl)
211
add_chr(attr, attrlen, ' ');
215
add_chr(attr, attrlen, *e);
216
goto parse_quoted_value;
222
add_chr(attr, attrlen, '\0');
225
if (memchr(attr, '&', attrlen)) {
226
unsigned char *saved_attr = attr;
228
attr = convert_string(NULL, saved_attr, attrlen, CSM_QUERY);
229
mem_free(saved_attr);
232
set_mem_comment(trim_chars(attr, ' ', NULL), name, strlen(name));
237
while (!WHITECHAR(*e) && !end_of_tag(*e)) {
238
if (!*e) goto parse_error;
242
unsigned char quote = *e;
245
while (*(++e) != quote)
246
if (!*e) goto parse_error;
248
} while (*e == quote);
255
if (attr) mem_free(attr);
262
static inline unsigned char *
263
get_url_val(unsigned char *e, unsigned char *name)
267
get_attr_val_eat_nl = 1;
268
val = get_attr_val(e, name);
269
get_attr_val_eat_nl = 0;
274
has_attr(unsigned char *e, unsigned char *name)
276
unsigned char *a = get_attr_val(e, name);
311
roman(unsigned char *p, unsigned n)
325
while (roman_tbl[i].n <= n) {
327
strcat(p, roman_tbl[i].s);
330
assertm(!(n && !roman_tbl[i].n),
331
"BUG in roman number convertor");
332
if_assert_failed break;
337
get_color(unsigned char *a, unsigned char *c, color_t *rgb)
342
if (global_doc_opts->color_mode == COLOR_MODE_MONO)
345
if (doc_colors_defaults(global_doc_opts))
348
at = get_attr_val(a, c);
351
r = decode_color(at, rgb);
358
get_bgcolor(unsigned char *a, color_t *rgb)
360
if (global_doc_opts->color_mode == COLOR_MODE_MONO)
363
if (!doc_colors_and_bg(global_doc_opts))
366
return get_color(a, "bgcolor", rgb);
369
static unsigned char *
370
get_target(unsigned char *a)
372
unsigned char *v = get_attr_val(a, "target");
375
if (!strcasecmp(v, "_self")) {
377
v = stracpy(global_doc_opts->framename);
388
struct html_element *element;
390
foreach (element, html_stack) {
391
DBG(":%p:%d:%.*s", element->name, element->namelen,
392
element->namelen, element->name);
394
WDBG("Did you enjoy it?");
398
static struct html_element *
399
search_html_stack(char *name)
401
struct html_element *element;
404
assert(name && *name);
405
namelen = strlen(name);
407
#if 0 /* Debug code. Please keep. */
411
foreach (element, html_stack) {
412
if (element == &html_top)
413
continue; /* skip the top element */
414
if (strlcasecmp(element->name, element->namelen, name, namelen))
423
kill_html_stack_item(struct html_element *e)
426
if_assert_failed return;
427
assertm((void *)e != &html_stack, "trying to free bad html element");
428
if_assert_failed return;
429
assertm(e->type != ELEMENT_IMMORTAL, "trying to kill unkillable element");
430
if_assert_failed return;
432
if (e->attr.link) mem_free(e->attr.link);
433
if (e->attr.target) mem_free(e->attr.target);
434
if (e->attr.image) mem_free(e->attr.image);
435
if (e->attr.title) mem_free(e->attr.title);
436
if (e->attr.href_base) mem_free(e->attr.href_base);
437
if (e->attr.target_base) mem_free(e->attr.target_base);
438
if (e->attr.select) mem_free(e->attr.select);
442
if (list_empty(html_stack) || !html_stack.next) {
443
DBG("killing last element");
449
kill_elem(unsigned char *e)
451
if (!strlcasecmp(html_top.name, html_top.namelen, e, -1))
452
kill_html_stack_item(&html_top);
463
struct html_element *e;
465
printf("HTML stack debug: \n");
466
foreachback (e, html_stack) {
470
for (i = 0; i < e->namelen; i++) printf("%c", e->name[i]);
480
html_stack_dup(enum html_element_type type)
482
struct html_element *e;
483
struct html_element *ep = html_stack.next;
485
assertm(ep && (void *)ep != &html_stack, "html stack empty");
486
if_assert_failed return;
488
e = mem_alloc(sizeof(struct html_element));
490
memcpy(e, ep, sizeof(struct html_element));
491
if (ep->attr.link) e->attr.link = stracpy(ep->attr.link);
492
if (ep->attr.target) e->attr.target = stracpy(ep->attr.target);
493
if (ep->attr.image) e->attr.image = stracpy(ep->attr.image);
494
if (ep->attr.title) e->attr.title = stracpy(ep->attr.title);
495
if (ep->attr.href_base) e->attr.href_base = stracpy(ep->attr.href_base);
496
if (ep->attr.target_base) e->attr.target_base = stracpy(ep->attr.target_base);
497
if (ep->attr.select) e->attr.select = stracpy(ep->attr.select);
500
if (e->attr.link) set_mem_comment(e->attr.link, e->name, e->namelen);
501
if (e->attr.target) set_mem_comment(e->attr.target, e->name, e->namelen);
502
if (e->attr.image) set_mem_comment(e->attr.image, e->name, e->namelen);
503
if (e->attr.title) set_mem_comment(e->attr.title, e->name, e->namelen);
504
if (e->attr.href_base) set_mem_comment(e->attr.href_base, e->name, e->namelen);
505
if (e->attr.target_base) set_mem_comment(e->attr.target_base, e->name, e->namelen);
506
if (e->attr.select) set_mem_comment(e->attr.select, e->name, e->namelen);
509
e->name = e->options = NULL;
512
add_to_list(html_stack, e);
516
void (*put_chars_f)(void *, unsigned char *, int);
517
void (*line_break_f)(void *);
518
void *(*special_f)(void *, enum html_special_type, ...);
520
static unsigned char *eoff;
521
static unsigned char *eofff;
522
static unsigned char *startf;
524
static int line_breax;
531
static int has_link_lines;
534
ln_break(int n, void (*line_break)(void *), void *f)
536
if (!n || html_top.invisible) return;
537
while (n > line_breax) line_breax++, line_break(f);
543
put_chrs(unsigned char *start, int len,
544
void (*put_chars)(void *, unsigned char *, int), void *f)
546
if (par_format.align == AL_NONE) putsp = 0;
547
if (!len || html_top.invisible) return;
548
if (putsp == 1) put_chars(f, " ", 1), position++, putsp = -1;
550
if (start[0] == ' ') start++, len--;
555
if (par_format.align == AL_NONE) putsp = 0;
558
if (start[len - 1] == ' ') putsp = -1;
559
if (par_format.align == AL_NONE) putsp = 0;
561
put_chars(f, start, len);
564
if (was_li > 0) was_li--;
568
kill_until(int ls, ...)
571
struct html_element *e = &html_top;
575
while ((void *)e != &html_stack) {
581
unsigned char *s = va_arg(arg, unsigned char *);
587
int slen = strlen(s);
589
if (!strlcasecmp(e->name, e->namelen, s, slen)) {
591
if (e->type != ELEMENT_KILLABLE) break;
594
} else if (sk == 1) {
605
if (e->type != ELEMENT_KILLABLE
606
|| (!strlcasecmp(e->name, e->namelen, "TABLE", 5)))
609
if (e->namelen == 2 && upcase(e->name[0]) == 'T') {
610
unsigned char c = upcase(e->name[1]);
612
if (c == 'D' || c == 'H' || c == 'R') break;
622
while ((void *)e != &html_stack) {
623
if (ls && e == html_stack.next) break;
624
if (e->linebreak > l) l = e->linebreak;
626
kill_html_stack_item(e->next);
628
ln_break(l, line_break_f, ff);
632
get_num(unsigned char *a, unsigned char *n)
634
unsigned char *al = get_attr_val(a, n);
641
s = strtoul(al, (char **)&end, 10);
642
if (errno || !*al || *end || s < 0) s = -1;
652
parse_width(unsigned char *w, int trunc)
660
while (WHITECHAR(*w)) w++;
661
for (l = 0; w[l] && w[l] != ','; l++);
663
while (l && WHITECHAR(w[l - 1])) l--;
666
if (w[l - 1] == '%') l--, p = 1;
668
while (l && WHITECHAR(w[l - 1])) l--;
671
width = par_format.width - par_format.leftmargin - par_format.rightmargin;
674
s = strtoul((char *)w, (char **)&end, 10);
675
if (errno) return -1;
682
} else s = (s + (HTML_CHAR_WIDTH - 1) / 2) / HTML_CHAR_WIDTH;
684
if (trunc && s > width) s = width;
692
get_width(unsigned char *a, unsigned char *n, int trunc)
695
unsigned char *w = get_attr_val(a, n);
698
r = parse_width(w, trunc);
705
set_fragment_identifier(unsigned char *attr_name, unsigned char *attr)
707
unsigned char *id_attr = get_attr_val(attr_name, attr);
710
special_f(ff, SP_TAG, id_attr);
716
add_fragment_identifier(void *part, unsigned char *attr)
718
special_f(part, SP_TAG, attr);
721
static struct form form = NULL_STRUCT_FORM;
723
static unsigned char *last_form_tag;
724
static unsigned char *last_form_attr;
725
static unsigned char *last_input_tag;
727
static unsigned char *object_src;
730
put_link_line(unsigned char *prefix, unsigned char *linkname,
731
unsigned char *link, unsigned char *target)
734
html_stack_dup(ELEMENT_KILLABLE);
735
ln_break(1, line_break_f, ff);
736
if (format.link) mem_free(format.link), format.link = NULL;
737
if (format.target) mem_free(format.target), format.target = NULL;
738
if (format.title) mem_free(format.title), format.title = NULL;
740
put_chrs(prefix, strlen(prefix), put_chars_f, ff);
741
format.link = join_urls(format.href_base, link);
742
format.target = stracpy(target);
743
format.fg = format.clink;
744
put_chrs(linkname, strlen(linkname), put_chars_f, ff);
745
ln_break(1, line_break_f, ff);
746
kill_html_stack_item(&html_top);
750
html_span(unsigned char *a)
755
html_bold(unsigned char *a)
757
format.attr |= AT_BOLD;
761
html_italic(unsigned char *a)
763
format.attr |= AT_ITALIC;
767
html_underline(unsigned char *a)
769
format.attr |= AT_UNDERLINE;
773
html_fixed(unsigned char *a)
775
format.attr |= AT_FIXED;
779
html_subscript(unsigned char *a)
781
format.attr |= AT_SUBSCRIPT;
785
html_superscript(unsigned char *a)
787
format.attr |= AT_SUPERSCRIPT;
790
/* Extract the extra information that is available for elements which can
791
* receive focus. Call this from each element which supports tabindex or
793
/* Note that in ELinks, we support those attributes (I mean, we call this
794
* function) while processing any focusable element (otherwise it'd have zero
795
* tabindex, thus messing up navigation between links), thus we support these
796
* attributes even near tags where we're not supposed to (like IFRAME, FRAME or
797
* LINK). I think this doesn't make any harm ;). --pasky */
799
html_focusable(unsigned char *a)
801
unsigned char *accesskey;
804
format.accesskey = 0;
805
format.tabindex = 0x80000000;
809
accesskey = get_attr_val(a, "accesskey");
811
accesskey[0] = upcase(accesskey[0]);
812
format.accesskey = read_key(accesskey);
816
tabindex = get_num(a, "tabindex");
818
format.tabindex = (tabindex & 0x7fff) << 16;
823
html_a(unsigned char *a)
827
href = get_url_val(a, "href");
829
unsigned char *target;
831
if (format.link) mem_free(format.link);
832
format.link = join_urls(format.href_base, trim_chars(href, ' ', 0));
836
target = get_target(a);
838
if (format.target) mem_free(format.target);
839
format.target = target;
841
if (format.target) mem_free(format.target);
842
format.target = stracpy(format.target_base);
844
#ifdef CONFIG_GLOBHIST
845
if (get_global_history_item(format.link))
846
format.fg = format.vlink;
849
format.fg = format.clink;
851
if (format.title) mem_free(format.title);
852
format.title = get_attr_val(a, "title");
857
kill_html_stack_item(&html_top);
860
set_fragment_identifier(a, "name");
864
html_font(unsigned char *a)
866
unsigned char *al = get_attr_val(a, "size");
871
unsigned char *nn = al;
874
if (*al == '+') p = 1, nn++;
875
else if (*al == '-') p = -1, nn++;
878
s = strtoul(nn, (char **)&end, 10);
879
if (!errno && *nn && !*end) {
881
if (!p) format.fontsize = s;
882
else format.fontsize += p * s;
883
if (format.fontsize < 1) format.fontsize = 1;
884
else if (format.fontsize > 7) format.fontsize = 7;
888
get_color(a, "color", &format.fg);
892
html_img(unsigned char *a)
894
int ismap, usemap = 0;
895
int add_brackets = 0;
896
unsigned char *al = get_attr_val(a, "usemap");
902
html_stack_dup(ELEMENT_KILLABLE);
903
if (format.link) mem_free(format.link);
904
if (format.form) format.form = NULL;
905
u = join_urls(format.href_base, al);
911
format.link = straconcat("MAP@", u, NULL);
912
format.attr |= AT_BOLD;
916
ismap = format.link && has_attr(a, "ismap") && !usemap;
918
al = get_attr_val(a, "alt");
919
if (!al) al = get_attr_val(a, "title");
922
if (al) mem_free(al);
923
if (!global_doc_opts->images && !format.link) return;
928
al = stracpy("USEMAP");
930
al = stracpy("ISMAP");
932
unsigned char *src = NULL;
936
src = null_or_stracpy(object_src);
937
if (!src) src = get_url_val(a, "src");
939
/* We can display image as [foo.gif]. */
941
max_len = get_opt_int("document.browse.images.file_tags");
944
/* This should be maybe whole terminal width? */
945
max_real_len = par_format.width * max_len / 100;
947
/* It didn't work well and I'm too lazy to code that;
948
* absolute values will have to be enough for now ;).
950
max_real_len = max_len;
953
if ((!max_len || max_real_len > 0) && src) {
954
int len = strcspn(src, "?");
955
unsigned char *start;
957
for (start = src + len; start > src; start--)
958
if (dir_sep(*start)) {
963
if (start > src) len = strcspn(start, "?");
965
if (max_len && len > max_real_len) {
966
int max_part_len = max_real_len / 2;
968
al = mem_alloc(max_part_len * 2 + 2);
971
/* TODO: Faster way ?? sprintf() is quite expensive. */
972
sprintf(al, "%.*s*%.*s",
974
max_part_len, start + len
978
al = mem_alloc(len + 1);
981
/* TODO: Faster way ?? */
982
sprintf(al, "%.*s", len, start);
988
if (src) mem_free(src);
992
if (format.image) mem_free(format.image), format.image = NULL;
993
if (format.title) mem_free(format.title), format.title = NULL;
996
int img_link_tag = get_opt_int("document.browse.images.image_link_tagging");
999
if (img_link_tag && (img_link_tag == 2 || add_brackets)) {
1000
unsigned char *img_link_prefix = get_opt_str("document.browse.images.image_link_prefix");
1001
unsigned char *img_link_suffix = get_opt_str("document.browse.images.image_link_suffix");
1002
unsigned char *tmp = straconcat(img_link_prefix, al, img_link_suffix, NULL);
1010
if (!get_opt_bool("document.browse.images.show_any_as_links")) {
1015
if ((s = null_or_stracpy(object_src))
1016
|| (s = get_url_val(a, "src"))
1017
|| (s = get_url_val(a, "dynsrc"))) {
1018
format.image = join_urls(format.href_base, s);
1022
format.title = get_attr_val(a, "title");
1027
html_stack_dup(ELEMENT_KILLABLE);
1028
h = stracpy(format.link);
1030
add_to_strn(&h, "?0,0");
1031
mem_free(format.link);
1036
/* This is not 100% appropriate for <img>, but well, accepting
1037
* accesskey and tabindex near <img> is just our little
1038
* extension to the standart. After all, it makes sense. */
1041
put_chrs(al, strlen(al), put_chars_f, ff);
1042
if (ismap) kill_html_stack_item(&html_top);
1043
/* Anything below must take care of properly handling the
1044
* show_any_as_links variable being off! */
1046
if (format.image) mem_free(format.image), format.image = NULL;
1047
if (format.title) mem_free(format.title), format.title = NULL;
1048
if (al) mem_free(al);
1049
if (usemap) kill_html_stack_item(&html_top);
1050
/*put_chrs(" ", 1, put_chars_f, ff);*/
1054
html_body(unsigned char *a)
1056
get_color(a, "text", &format.fg);
1057
get_color(a, "link", &format.clink);
1058
get_color(a, "vlink", &format.vlink);
1060
get_bgcolor(a, &par_format.bgcolor);
1061
if (get_bgcolor(a, &format.bg) >= 0) {
1062
/* Modify the root HTML element - format_html_part() will take
1063
* this from there. */
1064
struct html_element *e = html_stack.prev;
1066
e->parattr.bgcolor = par_format.bgcolor;
1067
e->attr.bg = format.bg;
1071
&& par_format.bgcolor
1072
&& !search_html_stack("BODY")) {
1073
special_f(ff, SP_COLOR_LINK_LINES);
1078
html_skip(unsigned char *a)
1080
html_top.invisible = 1;
1081
html_top.type = ELEMENT_DONT_KILL;
1085
html_title(unsigned char *a)
1087
html_top.invisible = 1;
1088
html_top.type = ELEMENT_DONT_KILL;
1092
html_center(unsigned char *a)
1094
par_format.align = AL_CENTER;
1096
par_format.leftmargin = par_format.rightmargin = 0;
1100
html_linebrk(unsigned char *a)
1102
unsigned char *al = get_attr_val(a, "align");
1105
if (!strcasecmp(al, "left")) par_format.align = AL_LEFT;
1106
else if (!strcasecmp(al, "right")) par_format.align = AL_RIGHT;
1107
else if (!strcasecmp(al, "center")) {
1108
par_format.align = AL_CENTER;
1110
par_format.leftmargin = par_format.rightmargin = 0;
1111
} else if (!strcasecmp(al, "justify")) par_format.align = AL_BLOCK;
1117
html_br(unsigned char *a)
1121
ln_break(2, line_break_f, ff);
1127
html_form(unsigned char *a)
1133
html_p(unsigned char *a)
1135
int_lower_bound(&par_format.leftmargin, margin);
1136
int_lower_bound(&par_format.rightmargin, margin);
1137
/*par_format.align = AL_LEFT;*/
1142
html_address(unsigned char *a)
1144
par_format.leftmargin++;
1145
par_format.align = AL_LEFT;
1149
html_blockquote(unsigned char *a)
1151
par_format.leftmargin += 2;
1152
par_format.align = AL_LEFT;
1156
html_h(int h, unsigned char *a,
1157
enum format_align default_align)
1159
if (!par_format.align) par_format.align = default_align;
1165
switch (par_format.align) {
1167
par_format.leftmargin = h * 2;
1168
par_format.rightmargin = 0;
1171
par_format.leftmargin = 0;
1172
par_format.rightmargin = h * 2;
1175
par_format.leftmargin = par_format.rightmargin = 0;
1178
par_format.leftmargin = par_format.rightmargin = h * 2;
1181
/* Silence compiler warnings */
1187
html_h1(unsigned char *a)
1189
format.attr |= AT_BOLD;
1190
html_h(1, a, AL_CENTER);
1194
html_h2(unsigned char *a)
1196
html_h(2, a, AL_LEFT);
1200
html_h3(unsigned char *a)
1202
html_h(3, a, AL_LEFT);
1206
html_h4(unsigned char *a)
1208
html_h(4, a, AL_LEFT);
1212
html_h5(unsigned char *a)
1214
html_h(5, a, AL_LEFT);
1218
html_h6(unsigned char *a)
1220
html_h(6, a, AL_LEFT);
1224
html_pre(unsigned char *a)
1226
par_format.align = AL_NONE;
1227
par_format.leftmargin = (par_format.leftmargin > 1);
1228
par_format.rightmargin = 0;
1232
html_xmp(unsigned char *a)
1239
html_hr(unsigned char *a)
1241
int i/* = par_format.width - 10*/;
1242
unsigned char r = (unsigned char)BORDER_DHLINE;
1243
int q = get_num(a, "size");
1245
if (q >= 0 && q < 2) r = (unsigned char)BORDER_SHLINE;
1246
html_stack_dup(ELEMENT_KILLABLE);
1247
par_format.align = AL_CENTER;
1248
if (format.link) mem_free(format.link), format.link = NULL;
1251
if (par_format.align == AL_BLOCK) par_format.align = AL_CENTER;
1252
par_format.leftmargin = par_format.rightmargin = margin;
1254
i = get_width(a, "width", 1);
1255
if (i == -1) i = par_format.width - (margin - 2) * 2;
1256
format.attr = AT_GRAPHICS;
1257
special_f(ff, SP_NOWRAP, 1);
1258
while (i-- > 0) put_chrs(&r, 1, put_chars_f, ff);
1259
special_f(ff, SP_NOWRAP, 0);
1260
ln_break(2, line_break_f, ff);
1261
kill_html_stack_item(&html_top);
1265
html_table(unsigned char *a)
1267
par_format.leftmargin = par_format.rightmargin = margin;
1268
par_format.align = AL_LEFT;
1274
html_tr(unsigned char *a)
1280
html_th(unsigned char *a)
1282
/*html_linebrk(a);*/
1283
kill_until(1, "TD", "TH", "", "TR", "TABLE", NULL);
1284
format.attr |= AT_BOLD;
1285
put_chrs(" ", 1, put_chars_f, ff);
1289
html_td(unsigned char *a)
1291
/*html_linebrk(a);*/
1292
kill_until(1, "TD", "TH", "", "TR", "TABLE", NULL);
1293
format.attr &= ~AT_BOLD;
1294
put_chrs(" ", 1, put_chars_f, ff);
1298
html_base(unsigned char *a)
1300
unsigned char *al = get_url_val(a, "href");
1303
if (format.href_base) mem_free(format.href_base);
1304
format.href_base = join_urls(((struct html_element *)html_stack.prev)->attr.href_base, al);
1310
if (format.target_base) mem_free(format.target_base);
1311
format.target_base = al;
1316
html_ul(unsigned char *a)
1321
par_format.list_level++;
1322
par_format.list_number = 0;
1323
par_format.flags = P_STAR;
1325
al = get_attr_val(a, "type");
1327
if (!strcasecmp(al, "disc") || !strcasecmp(al, "circle"))
1328
par_format.flags = P_O;
1329
else if (!strcasecmp(al, "square"))
1330
par_format.flags = P_PLUS;
1333
par_format.leftmargin += 2 + (par_format.list_level > 1);
1335
int_upper_bound(&par_format.leftmargin, par_format.width / 2);
1337
par_format.align = AL_LEFT;
1338
html_top.type = ELEMENT_DONT_KILL;
1342
html_ol(unsigned char *a)
1347
par_format.list_level++;
1348
st = get_num(a, "start");
1349
if (st == -1) st = 1;
1350
par_format.list_number = st;
1351
par_format.flags = P_NUMBER;
1353
al = get_attr_val(a, "type");
1355
if (*al && !al[1]) {
1356
if (*al == '1') par_format.flags = P_NUMBER;
1357
else if (*al == 'a') par_format.flags = P_alpha;
1358
else if (*al == 'A') par_format.flags = P_ALPHA;
1359
else if (*al == 'r') par_format.flags = P_roman;
1360
else if (*al == 'R') par_format.flags = P_ROMAN;
1361
else if (*al == 'i') par_format.flags = P_roman;
1362
else if (*al == 'I') par_format.flags = P_ROMAN;
1367
par_format.leftmargin += (par_format.list_level > 1);
1369
int_upper_bound(&par_format.leftmargin, par_format.width / 2);
1371
par_format.align = AL_LEFT;
1372
html_top.type = ELEMENT_DONT_KILL;
1376
html_li(unsigned char *a)
1378
/* When handling the code <li><li> @was_li will be 1 and it means we
1379
* have to insert a line break since no list item content has done it
1383
ln_break(1, line_break_f, ff);
1386
/*kill_until(0, "", "UL", "OL", NULL);*/
1387
if (!par_format.list_number) {
1388
unsigned char x[7] = "* ";
1389
int t = par_format.flags & P_LISTMASK;
1391
if (t == P_O) x[0] = 'o';
1392
if (t == P_PLUS) x[0] = '+';
1393
put_chrs(x, 7, put_chars_f, ff);
1394
par_format.leftmargin += 2;
1395
par_format.align = AL_LEFT;
1397
unsigned char c = 0;
1398
unsigned char n[32];
1399
int t = par_format.flags & P_LISTMASK;
1400
int s = get_num(a, "value");
1402
if (s != -1) par_format.list_number = s;
1403
if ((t != P_roman && t != P_ROMAN && par_format.list_number < 10)
1404
|| t == P_alpha || t == P_ALPHA)
1405
put_chrs(" ", 6, put_chars_f, ff), c = 1;
1407
if (t == P_ALPHA || t == P_alpha) {
1408
n[0] = par_format.list_number
1409
? (par_format.list_number - 1) % 26
1410
+ (t == P_ALPHA ? 'A' : 'a')
1413
} else if (t == P_ROMAN || t == P_roman) {
1414
roman(n, par_format.list_number);
1416
register unsigned char *x;
1418
for (x = n; *x; x++) *x = upcase(*x);
1421
ulongcat(n, NULL, par_format.list_number, (sizeof(n) - 1), 0);
1423
put_chrs(n, strlen(n), put_chars_f, ff);
1424
put_chrs(". ", 7, put_chars_f, ff);
1425
par_format.leftmargin += strlen(n) + c + 2;
1426
par_format.align = AL_LEFT;
1427
par_format.list_number = 0;
1428
html_top.next->parattr.list_number++;
1437
html_dl(unsigned char *a)
1439
par_format.flags &= ~P_COMPACT;
1440
if (has_attr(a, "compact")) par_format.flags |= P_COMPACT;
1441
if (par_format.list_level) par_format.leftmargin += 5;
1442
par_format.list_level++;
1443
par_format.list_number = 0;
1444
par_format.align = AL_LEFT;
1445
par_format.dd_margin = par_format.leftmargin;
1446
html_top.type = ELEMENT_DONT_KILL;
1447
if (!(par_format.flags & P_COMPACT)) {
1448
ln_break(2, line_break_f, ff);
1449
html_top.linebreak = 2;
1454
html_dt(unsigned char *a)
1456
kill_until(0, "", "DL", NULL);
1457
par_format.align = AL_LEFT;
1458
par_format.leftmargin = par_format.dd_margin;
1459
if (!(par_format.flags & P_COMPACT) && !has_attr(a, "compact"))
1460
ln_break(2, line_break_f, ff);
1464
html_dd(unsigned char *a)
1466
kill_until(0, "", "DL", NULL);
1468
par_format.leftmargin = par_format.dd_margin + (table_level ? 3 : 8);
1470
int_upper_bound(&par_format.leftmargin, par_format.width / 2);
1471
par_format.align = AL_LEFT;
1475
get_html_form(unsigned char *a, struct form *form)
1479
form->method = FM_GET;
1481
al = get_attr_val(a, "method");
1483
if (!strcasecmp(al, "post")) {
1484
char *ax = get_attr_val(a, "enctype");
1486
form->method = FM_POST;
1488
if (!strcasecmp(ax, "multipart/form-data"))
1489
form->method = FM_POST_MP;
1496
al = get_attr_val(a, "action");
1498
form->action = join_urls(format.href_base, trim_chars(al, ' ', 0));
1501
form->action = stracpy(format.href_base);
1503
int len = get_no_post_url_length(form->action);
1505
form->action[len] = '\0';
1507
/* We have to do following for GET method, because we would end
1508
* up with two '?' otherwise. */
1509
if (form->method == FM_GET) {
1510
unsigned char *ch = strchr(form->action, '?');
1520
form->target = stracpy(format.target_base);
1523
form->num = a - startf;
1527
find_form_for_input(unsigned char *i)
1529
unsigned char *s, *ss, *name, *attr;
1530
unsigned char *lf = NULL;
1531
unsigned char *la = NULL;
1534
if (form.action) mem_free(form.action);
1535
if (form.target) mem_free(form.target);
1536
memset(&form, 0, sizeof(form));
1538
if (!special_f(ff, SP_USED, NULL)) return;
1540
if (last_input_tag && i <= last_input_tag && i > last_form_tag) {
1541
get_html_form(last_form_attr, &form);
1544
if (last_input_tag && i > last_input_tag)
1550
while (s < i && *s != '<') {
1555
if (s >= i) goto end_parse;
1556
if (s + 2 <= eofff && (s[1] == '!' || s[1] == '?')) {
1557
s = skip_comment(s, i);
1561
if (parse_element(s, i, &name, &namelen, &attr, &s)) goto sp;
1562
if (strlcasecmp(name, namelen, "FORM", 4)) goto se;
1571
last_form_attr = la;
1573
get_html_form(la, &form);
1575
memset(&form, 0, sizeof(struct form));
1580
html_button(unsigned char *a)
1583
struct form_control *fc;
1585
find_form_for_input(a);
1588
fc = mem_calloc(1, sizeof(struct form_control));
1591
al = get_attr_val(a, "type");
1593
fc->type = FC_SUBMIT;
1597
if (!strcasecmp(al, "submit")) fc->type = FC_SUBMIT;
1598
else if (!strcasecmp(al, "reset")) fc->type = FC_RESET;
1599
else if (!strcasecmp(al, "button")) {
1601
put_chrs(" [ ", 8, put_chars_f, ff);
1603
al = get_attr_val(a, "value");
1605
put_chrs(al, strlen(al), put_chars_f, ff);
1607
} else put_chrs("BUTTON", 6, put_chars_f, ff);
1609
put_chrs(" ] ", 8, put_chars_f, ff);
1620
fc->form_num = last_form_tag - startf;
1621
fc->ctrl_num = a - last_form_tag;
1622
fc->position = a - startf;
1623
fc->method = form.method;
1624
fc->action = null_or_stracpy(form.action);
1625
fc->name = get_attr_val(a, "name");
1627
fc->default_value = get_attr_val(a, "value");
1628
if (!fc->default_value && fc->type == FC_SUBMIT) fc->default_value = stracpy("Submit");
1629
if (!fc->default_value && fc->type == FC_RESET) fc->default_value = stracpy("Reset");
1630
if (!fc->default_value) fc->default_value = stracpy("");
1632
fc->ro = has_attr(a, "disabled") ? 2 : has_attr(a, "readonly") ? 1 : 0;
1633
if (fc->type == FC_IMAGE) fc->alt = get_attr_val(a, "alt");
1634
special_f(ff, SP_CONTROL, fc);
1636
format.attr |= AT_BOLD;
1638
put_chrs("[ ", 7, put_chars_f, ff);
1639
if (fc->default_value) put_chrs(fc->default_value, strlen(fc->default_value), put_chars_f, ff);
1640
put_chrs(" ]", 7, put_chars_f, ff);
1641
put_chrs(" ", 1, put_chars_f, ff);
1646
html_input(unsigned char *a)
1650
struct form_control *fc;
1652
find_form_for_input(a);
1655
fc = mem_calloc(1, sizeof(struct form_control));
1658
al = get_attr_val(a, "type");
1663
if (!strcasecmp(al, "text")) fc->type = FC_TEXT;
1664
else if (!strcasecmp(al, "password")) fc->type = FC_PASSWORD;
1665
else if (!strcasecmp(al, "checkbox")) fc->type = FC_CHECKBOX;
1666
else if (!strcasecmp(al, "radio")) fc->type = FC_RADIO;
1667
else if (!strcasecmp(al, "submit")) fc->type = FC_SUBMIT;
1668
else if (!strcasecmp(al, "reset")) fc->type = FC_RESET;
1669
else if (!strcasecmp(al, "file")) fc->type = FC_FILE;
1670
else if (!strcasecmp(al, "hidden")) fc->type = FC_HIDDEN;
1671
else if (!strcasecmp(al, "image")) fc->type = FC_IMAGE;
1672
else if (!strcasecmp(al, "button")) {
1674
put_chrs(" [ ", 8, put_chars_f, ff);
1676
al = get_attr_val(a, "value");
1678
put_chrs(al, strlen(al), put_chars_f, ff);
1680
} else put_chrs("BUTTON", 6, put_chars_f, ff);
1682
put_chrs(" ] ", 8, put_chars_f, ff);
1685
} else fc->type = FC_TEXT;
1689
fc->form_num = last_form_tag - startf;
1690
fc->ctrl_num = a - last_form_tag;
1691
fc->position = a - startf;
1692
fc->method = form.method;
1693
fc->action = null_or_stracpy(form.action);
1694
fc->target = null_or_stracpy(form.target);
1695
fc->name = get_attr_val(a, "name");
1697
if (fc->type != FC_FILE) fc->default_value = get_attr_val(a, "value");
1698
if (!fc->default_value && fc->type == FC_CHECKBOX) fc->default_value = stracpy("on");
1699
if (!fc->default_value && fc->type == FC_SUBMIT) fc->default_value = stracpy("Submit");
1700
if (!fc->default_value && fc->type == FC_RESET) fc->default_value = stracpy("Reset");
1701
if (!fc->default_value) fc->default_value = stracpy("");
1703
fc->size = get_num(a, "size");
1704
if (fc->size == -1) fc->size = global_doc_opts->default_form_input_size;
1706
if (fc->size > global_doc_opts->width) fc->size = global_doc_opts->width;
1707
fc->maxlength = get_num(a, "maxlength");
1708
if (fc->maxlength == -1) fc->maxlength = MAXINT;
1709
if (fc->type == FC_CHECKBOX || fc->type == FC_RADIO) fc->default_state = has_attr(a, "checked");
1710
fc->ro = has_attr(a, "disabled") ? 2 : has_attr(a, "readonly") ? 1 : 0;
1711
if (fc->type == FC_IMAGE) fc->alt = get_attr_val(a, "alt");
1712
if (fc->type == FC_HIDDEN) goto hid;
1714
put_chrs(" ", 1, put_chars_f, ff);
1715
html_stack_dup(ELEMENT_KILLABLE);
1717
if (format.title) mem_free(format.title);
1718
format.title = get_attr_val(a, "title");
1723
format.attr |= AT_BOLD;
1724
for (i = 0; i < fc->size; i++) put_chrs("_", 1, put_chars_f, ff);
1727
format.attr |= AT_BOLD;
1728
put_chrs("[ ]", 8, put_chars_f, ff);
1731
format.attr |= AT_BOLD;
1732
put_chrs("( )", 8, put_chars_f, ff);
1735
if (format.image) mem_free(format.image), format.image = NULL;
1736
if ((al = get_url_val(a, "src"))
1737
|| (al = get_url_val(a, "dynsrc"))) {
1738
format.image = join_urls(format.href_base, al);
1741
format.attr |= AT_BOLD;
1742
put_chrs("[ ", 7, put_chars_f, ff);
1743
if (fc->alt) put_chrs(fc->alt, strlen(fc->alt), put_chars_f, ff);
1744
else if (fc->name) put_chrs(fc->name, strlen(fc->name), put_chars_f, ff);
1745
else put_chrs("Submit", 6, put_chars_f, ff);
1746
put_chrs(" ]", 7, put_chars_f, ff);
1750
format.attr |= AT_BOLD;
1751
put_chrs("[ ", 7, put_chars_f, ff);
1752
if (fc->default_value)
1753
put_chrs(fc->default_value, strlen(fc->default_value), put_chars_f, ff);
1754
put_chrs(" ]", 7, put_chars_f, ff);
1759
INTERNAL("bad control type");
1761
kill_html_stack_item(&html_top);
1762
put_chrs(" ", 1, put_chars_f, ff);
1765
special_f(ff, SP_CONTROL, fc);
1769
html_select(unsigned char *a)
1771
/* Note I haven't seen this code in use, do_html_select() seems to take
1772
* care of bussiness. --FF */
1774
unsigned char *al = get_attr_val(a, "name");
1778
html_top.type = ELEMENT_DONT_KILL;
1780
format.select_disabled = 2 * has_attr(a, "disabled");
1784
html_option(unsigned char *a)
1786
struct form_control *fc;
1789
find_form_for_input(a);
1790
if (!format.select) return;
1792
fc = mem_calloc(1, sizeof(struct form_control));
1795
val = get_attr_val(a, "value");
1798
unsigned char *p, *r;
1799
unsigned char *name;
1802
for (p = a - 1; *p != '<'; p--);
1804
if (!init_string(&str)) goto x;
1805
if (parse_element(p, eoff, NULL, NULL, NULL, &p)) {
1806
INTERNAL("parse element failed");
1812
while (p < eoff && WHITECHAR(*p)) p++;
1813
while (p < eoff && !WHITECHAR(*p) && *p != '<') {
1816
add_char_to_string(&str, *p), p++;
1820
val = str.source; /* Has to be before the possible 'goto x' */
1822
while (r < eoff && WHITECHAR(*r)) r++;
1823
if (r >= eoff) goto x;
1824
if (r - 2 <= eoff && (r[1] == '!' || r[1] == '?')) {
1825
p = skip_comment(r, eoff);
1828
if (parse_element(r, eoff, &name, &namelen, NULL, &p)) goto pppp;
1829
if (!(!strlcasecmp(name, namelen, "OPTION", 6) ||
1830
!strlcasecmp(name, namelen, "/OPTION", 7) ||
1831
!strlcasecmp(name, namelen, "SELECT", 6) ||
1832
!strlcasecmp(name, namelen, "/SELECT", 7) ||
1833
!strlcasecmp(name, namelen, "OPTGROUP", 8) ||
1834
!strlcasecmp(name, namelen, "/OPTGROUP", 9)
1840
fc->form_num = last_form_tag - startf;
1841
fc->ctrl_num = a - last_form_tag;
1842
fc->position = a - startf;
1843
fc->method = form.method;
1844
fc->action = null_or_stracpy(form.action);
1845
fc->type = FC_CHECKBOX;
1846
fc->name = null_or_stracpy(format.select);
1847
fc->default_value = val;
1848
fc->default_state = has_attr(a, "selected");
1849
fc->ro = format.select_disabled;
1850
if (has_attr(a, "disabled")) fc->ro = 2;
1851
put_chrs(" ", 1, put_chars_f, ff);
1852
html_stack_dup(ELEMENT_KILLABLE);
1854
format.attr |= AT_BOLD;
1855
put_chrs("[ ]", 3, put_chars_f, ff);
1856
kill_html_stack_item(&html_top);
1857
put_chrs(" ", 1, put_chars_f, ff);
1858
special_f(ff, SP_CONTROL, fc);
1861
static struct list_menu lnk_menu;
1864
do_html_select(unsigned char *attr, unsigned char *html,
1865
unsigned char *eof, unsigned char **end, void *f)
1867
struct conv_table *ct = special_f(f, SP_TABLE, NULL);
1868
struct form_control *fc;
1869
struct string lbl = NULL_STRING;
1870
unsigned char **val, **labels;
1871
unsigned char *t_name, *t_attr, *en;
1874
int order, preselect, group;
1877
if (has_attr(attr, "multiple")) return 1;
1878
find_form_for_input(attr);
1879
html_focusable(attr);
1881
order = 0, group = 0, preselect = -1;
1882
init_menu(&lnk_menu);
1889
while (html < eof && *html != '<') html++;
1896
if (lbl.source) done_string(&lbl);
1898
for (j = 0; j < order; j++)
1903
destroy_menu(&lnk_menu);
1909
unsigned char *q, *s = en;
1912
while (l && WHITECHAR(s[0])) s++, l--;
1913
while (l && WHITECHAR(s[l-1])) l--;
1914
q = convert_string(ct, s, l, CSM_DEFAULT);
1915
if (q) add_to_string(&lbl, q), mem_free(q);
1918
if (html + 2 <= eof && (html[1] == '!' || html[1] == '?')) {
1919
html = skip_comment(html, eof);
1923
if (parse_element(html, eof, &t_name, &t_namelen, &t_attr, &en)) {
1928
if (!strlcasecmp(t_name, t_namelen, "/SELECT", 7)) {
1929
add_select_item(&lnk_menu, &lbl, val, order, nnmi);
1933
if (!strlcasecmp(t_name, t_namelen, "/OPTION", 7)) {
1934
add_select_item(&lnk_menu, &lbl, val, order, nnmi);
1938
if (!strlcasecmp(t_name, t_namelen, "OPTION", 6)) {
1939
unsigned char *v, *vx;
1941
add_select_item(&lnk_menu, &lbl, val, order, nnmi);
1943
if (has_attr(t_attr, "disabled")) goto see;
1944
if (preselect == -1 && has_attr(t_attr, "selected")) preselect = order;
1945
v = get_attr_val(t_attr, "value");
1947
if (!mem_align_alloc(&val, order, order + 1, sizeof(unsigned char *), 0xFF))
1951
vx = get_attr_val(t_attr, "label");
1952
if (vx) new_menu_item(&lnk_menu, vx, order - 1, 0);
1960
if (!strlcasecmp(t_name, t_namelen, "OPTGROUP", 8)
1961
|| !strlcasecmp(t_name, t_namelen, "/OPTGROUP", 9)) {
1962
add_select_item(&lnk_menu, &lbl, val, order, nnmi);
1964
if (group) new_menu_item(&lnk_menu, NULL, -1, 0), group = 0;
1967
if (!strlcasecmp(t_name, t_namelen, "OPTGROUP", 8)) {
1968
unsigned char *label = get_attr_val(t_attr, "label");
1971
label = stracpy("");
1972
if (!label) goto see;
1974
new_menu_item(&lnk_menu, label, -1, 0);
1982
if (!order) goto abort;
1984
fc = mem_calloc(1, sizeof(struct form_control));
1985
if (!fc) goto abort;
1987
labels = mem_calloc(order, sizeof(unsigned char *));
1993
fc->form_num = last_form_tag - startf;
1994
fc->ctrl_num = attr - last_form_tag;
1995
fc->position = attr - startf;
1996
fc->method = form.method;
1997
fc->action = null_or_stracpy(form.action);
1998
fc->name = get_attr_val(attr, "name");
1999
fc->type = FC_SELECT;
2000
fc->default_state = preselect < 0 ? 0 : preselect;
2001
fc->default_value = order ? stracpy(val[fc->default_state]) : stracpy("");
2002
fc->ro = has_attr(attr, "disabled") ? 2 : has_attr(attr, "readonly") ? 1 : 0;
2003
fc->nvalues = order;
2005
fc->menu = detach_menu(&lnk_menu);
2006
fc->labels = labels;
2008
menu_labels(fc->menu, "", labels);
2009
put_chrs("[", 1, put_chars_f, f);
2010
html_stack_dup(ELEMENT_KILLABLE);
2012
format.attr |= AT_BOLD;
2015
for (i = 0; i < order; i++) {
2016
if (!labels[i]) continue;
2017
int_lower_bound(&max_width, strlen(labels[i]));
2020
for (i = 0; i < max_width; i++)
2021
put_chrs("_", 1, put_chars_f, f);
2023
kill_html_stack_item(&html_top);
2024
put_chrs("]", 1, put_chars_f, f);
2025
special_f(ff, SP_CONTROL, fc);
2031
html_textarea(unsigned char *a)
2033
INTERNAL("This should be never called");
2037
do_html_textarea(unsigned char *attr, unsigned char *html, unsigned char *eof,
2038
unsigned char **end, void *f)
2040
struct form_control *fc;
2041
unsigned char *p, *t_name, *wrap_attr;
2046
find_form_for_input(attr);
2047
html_focusable(attr);
2048
while (html < eof && (*html == '\n' || *html == '\r')) html++;
2050
while (p < eof && *p != '<') {
2058
if (parse_element(p, eof, &t_name, &t_namelen, NULL, end)) goto pp;
2059
if (strlcasecmp(t_name, t_namelen, "/TEXTAREA", 9)) goto pp;
2061
fc = mem_calloc(1, sizeof(struct form_control));
2064
fc->form_num = last_form_tag - startf;
2065
fc->ctrl_num = attr - last_form_tag;
2066
fc->position = attr - startf;
2067
fc->method = form.method;
2068
fc->action = null_or_stracpy(form.action);
2069
fc->name = get_attr_val(attr, "name");
2070
fc->type = FC_TEXTAREA;;
2071
fc->ro = has_attr(attr, "disabled") ? 2 : has_attr(attr, "readonly") ? 1 : 0;
2072
fc->default_value = memacpy(html, p - html);
2073
for (p = fc->default_value; p && p[0]; p++) {
2074
/* FIXME: We don't cope well with entities here. Bugzilla uses
2075
* inside of textarea and we fail miserably upon that
2078
if (p[1] == '\n' || (p > fc->default_value && p[-1] == '\n')) {
2079
memcpy(p, p + 1, strlen(p)), p--;
2086
cols = get_num(attr, "cols");
2087
if (cols <= 0) cols = global_doc_opts->default_form_input_size;
2088
cols++; /* Add 1 column, other browsers may have different
2089
behavior here (mozilla adds 2) --Zas */
2090
if (cols > global_doc_opts->width) cols = global_doc_opts->width;
2093
rows = get_num(attr, "rows");
2094
if (rows <= 0) rows = 1;
2095
if (rows > global_doc_opts->height) rows = global_doc_opts->height;
2097
global_doc_opts->needs_height = 1;
2099
wrap_attr = get_attr_val(attr, "wrap");
2101
if (!strcasecmp(wrap_attr, "hard")
2102
|| !strcasecmp(wrap_attr, "physical")) {
2104
} else if (!strcasecmp(wrap_attr, "soft")
2105
|| !strcasecmp(wrap_attr, "virtual")) {
2107
} else if (!strcasecmp(wrap_attr, "none")
2108
|| !strcasecmp(wrap_attr, "off")) {
2111
mem_free(wrap_attr);
2116
fc->maxlength = get_num(attr, "maxlength");
2117
if (fc->maxlength == -1) fc->maxlength = MAXINT;
2119
if (rows > 1) ln_break(1, line_break_f, f);
2120
else put_chrs(" ", 1, put_chars_f, f);
2122
html_stack_dup(ELEMENT_KILLABLE);
2124
format.attr |= AT_BOLD;
2126
for (i = 0; i < rows; i++) {
2129
for (j = 0; j < cols; j++)
2130
put_chrs("_", 1, put_chars_f, f);
2132
ln_break(1, line_break_f, f);
2135
kill_html_stack_item(&html_top);
2136
if (rows > 1) ln_break(1, line_break_f, f);
2137
else put_chrs(" ", 1, put_chars_f, f);
2138
special_f(f, SP_CONTROL, fc);
2142
html_iframe(unsigned char *a)
2144
unsigned char *name, *url = NULL;
2146
url = null_or_stracpy(object_src);
2147
if (!url) url = get_url_val(a, "src");
2150
name = get_attr_val(a, "name");
2151
if (!name) name = get_attr_val(a, "id");
2152
if (!name) name = stracpy("");
2161
put_link_line("IFrame: ", name, url, global_doc_opts->framename);
2163
put_link_line("", "IFrame", url, global_doc_opts->framename);
2171
html_object(unsigned char *a)
2173
unsigned char *type, *url;
2175
/* This is just some dirty wrapper. We emulate various things through
2176
* this, which is anyway in the spirit of <object> element, unifying
2177
* <img> and <iframe> etc. */
2179
url = get_url_val(a, "data");
2182
type = get_attr_val(a, "type");
2183
if (!type) { mem_free(url); return; }
2185
if (!strncasecmp(type, "text/", 5)) {
2186
/* We will just emulate <iframe>. */
2192
} else if (!strncasecmp(type, "image/", 6)) {
2193
/* <img> emulation. */
2194
/* TODO: Use the enclosed text as 'alt' attribute. */
2205
html_noframes(unsigned char *a)
2207
struct html_element *element;
2209
if (!global_doc_opts->frames) return;
2211
element = search_html_stack("frameset");
2212
if (element && !element->frameset) return;
2218
html_frame(unsigned char *a)
2220
unsigned char *name, *src, *url;
2222
src = get_url_val(a, "src");
2226
url = join_urls(format.href_base, src);
2231
name = get_attr_val(a, "name");
2233
name = stracpy(url);
2234
} else if (!name[0]) {
2235
/* When name doesn't have a value */
2237
name = stracpy(url);
2241
if (!global_doc_opts->frames || !html_top.frameset) {
2243
put_link_line("Frame: ", name, url, "");
2246
if (special_f(ff, SP_USED, NULL)) {
2247
special_f(ff, SP_FRAME, html_top.frameset, name, url);
2256
parse_frame_widths(unsigned char *a, int ww, int www, int **op, int *olp)
2267
while (WHITECHAR(*a)) a++;
2269
n = strtoul(a, (char **)&a, 10);
2276
if (*a == '%') q = q * ww / 100;
2277
else if (*a != '*') q = (q + (www - 1) / 2) / www;
2278
else if (!q) q = -1;
2281
oo = mem_realloc(o, (ol + 1) * sizeof(int));
2282
if (oo) (o = oo)[ol++] = q;
2287
aa = strchr(a, ',');
2295
for (i = 0; i < ol; i++) if (o[i] > 0) q += o[i] - 1;
2300
for (i = 0; i < ol; i++) if (o[i] < 1) o[i] = 1;
2303
for (i = 0; i < ol; i++) d += o[i];
2305
for (i = 0; i < ol; i++) {
2306
q -= o[i] - o[i] * (d - qq) / d;
2307
/* SIGH! gcc 2.7.2.* has an optimizer bug! */
2308
do_not_optimize_here_gcc_2_7(&d);
2309
o[i] = o[i] * (d - qq) / d;
2313
for (i = 0; i < ol; i++) {
2314
if (q < 0) o[i]++, q++, nn = 1;
2315
if (q > 0 && o[i] > 1) o[i]--, q--, nn = 1;
2323
for (i = 0; i < ol; i++) if (o[i] < 0) neg = 1;
2324
if (!neg) goto distribute;
2326
oo = mem_alloc(ol * sizeof(int));
2331
memcpy(oo, o, ol * sizeof(int));
2332
for (i = 0; i < ol; i++) if (o[i] < 1) o[i] = 1;
2335
for (i = 0; i < ol; i++) if (oo[i] < 0) d += -oo[i];
2337
for (i = 0; i < ol; i++) if (oo[i] < 0) {
2338
o[i] += (-oo[i] * qq / d);
2339
q -= (-oo[i] * qq / d);
2341
assertm(q >= 0, "parse_frame_widths: q < 0");
2342
if_assert_failed q = 0;
2343
for (i = 0; i < ol; i++) if (oo[i] < 0) {
2346
assertm(q <= 0, "parse_frame_widths: q > 0");
2347
if_assert_failed q = 0;
2351
for (i = 0; i < ol; i++) if (!o[i]) {
2356
for (j = 0; j < ol; j++) if (o[j] > m) m = o[j], mj = j;
2357
if (m) o[i] = 1, o[mj]--;
2362
html_frameset(unsigned char *a)
2364
struct frameset_param fp;
2365
unsigned char *cols, *rows;
2368
/* XXX: This is still not 100% correct. We should also ignore the
2369
* frameset when we encountered anything 3v1l (read as: non-whitespace
2370
* text/element/anything) in the document outside of <head>. Well, this
2371
* is still better than nothing and it should heal up the security
2372
* concerns at least because sane sites should enclose the documents in
2373
* <body> elements ;-). See also bug 171. --pasky */
2374
if (search_html_stack("BODY")
2375
|| !global_doc_opts->frames || !special_f(ff, SP_USED, NULL))
2378
cols = get_attr_val(a, "cols");
2380
cols = stracpy("100%");
2384
rows = get_attr_val(a, "rows");
2386
rows = stracpy("100%");
2393
if (!html_top.frameset) {
2394
width = global_doc_opts->width;
2395
height = global_doc_opts->height;
2396
global_doc_opts->needs_height = 1;
2398
struct frameset_desc *frameset_desc = html_top.frameset;
2401
if (frameset_desc->y >= frameset_desc->height)
2402
goto free_and_return;
2403
offset = frameset_desc->x
2404
+ frameset_desc->y * frameset_desc->width;
2405
width = frameset_desc->frame_desc[offset].width;
2406
height = frameset_desc->frame_desc[offset].height;
2409
fp.width = fp.height = NULL;
2411
parse_frame_widths(cols, width, HTML_FRAME_CHAR_WIDTH,
2413
parse_frame_widths(rows, height, HTML_FRAME_CHAR_HEIGHT,
2416
fp.parent = html_top.frameset;
2417
if (fp.x && fp.y) html_top.frameset = special_f(ff, SP_FRAMESET, &fp);
2418
if (fp.width) mem_free(fp.width);
2419
if (fp.height) mem_free(fp.height);
2429
Designates substitute versions for the document in which the link
2430
occurs. When used together with the lang attribute, it implies a
2431
translated version of the document. When used together with the
2432
media attribute, it implies a version designed for a different
2436
Refers to an external style sheet. See the section on external style
2437
sheets for details. This is used together with the link type
2438
"Alternate" for user-selectable alternate style sheets.
2441
Refers to the first document in a collection of documents. This link
2442
type tells search engines which document is considered by the author
2443
to be the starting point of the collection.
2446
Refers to the next document in a linear sequence of documents. User
2447
agents may choose to preload the "next" document, to reduce the
2448
perceived load time.
2451
Refers to the previous document in an ordered series of documents.
2452
Some user agents also support the synonym "Previous".
2455
Refers to a document serving as a table of contents.
2456
Some user agents also support the synonym ToC (from "Table of Contents").
2459
Refers to a document providing an index for the current document.
2462
Refers to a document providing a glossary of terms that pertain to the
2466
Refers to a copyright statement for the current document.
2469
Refers to a document serving as a chapter in a collection of documents.
2472
Refers to a document serving as a section in a collection of documents.
2475
Refers to a document serving as a subsection in a collection of
2479
Refers to a document serving as an appendix in a collection of
2483
Refers to a document offering help (more information, links to other
2484
sources information, etc.)
2487
Refers to a bookmark. A bookmark is a link to a key entry point
2488
within an extended document. The title attribute may be used, for
2489
example, to label the bookmark. Note that several bookmarks may be
2490
defined in each document.
2492
Some more were added, like top. --Zas */
2516
LT_ALTERNATE_STYLESHEET,
2520
enum hlink_direction {
2527
enum hlink_type type;
2528
enum hlink_direction direction;
2529
unsigned char *content_type;
2530
unsigned char *media;
2531
unsigned char *href;
2532
unsigned char *hreflang;
2533
unsigned char *title;
2534
unsigned char *lang;
2535
unsigned char *name;
2536
/* Not used implemented.
2537
unsigned char *charset;
2538
unsigned char *target;
2540
unsigned char *class;
2545
struct lt_default_name {
2546
enum hlink_type type;
2551
static struct lt_default_name lt_names[] = {
2552
{ LT_START, "start" },
2553
{ LT_PARENT, "parent" },
2554
{ LT_NEXT, "next" },
2555
{ LT_PREV, "previous" },
2556
{ LT_CONTENTS, "contents" },
2557
{ LT_INDEX, "index" },
2558
{ LT_GLOSSARY, "glossary" },
2559
{ LT_CHAPTER, "chapter" },
2560
{ LT_SECTION, "section" },
2561
{ LT_SUBSECTION, "subsection" },
2562
{ LT_APPENDIX, "appendix" },
2563
{ LT_HELP, "help" },
2564
{ LT_SEARCH, "search" },
2565
{ LT_BOOKMARK, "bookmark" },
2566
{ LT_ALTERNATE_LANG, "alt. language" },
2567
{ LT_ALTERNATE_MEDIA, "alt. media" },
2568
{ LT_ALTERNATE_STYLESHEET, "alt. stylesheet" },
2569
{ LT_STYLESHEET, "stylesheet" },
2570
{ LT_ALTERNATE, "alternate" },
2571
{ LT_COPYRIGHT, "copyright" },
2572
{ LT_AUTHOR, "author" },
2573
{ LT_ICON, "icon" },
2574
{ LT_UNKNOWN, NULL }
2577
/* Search for default name for this link according to its type. */
2578
static unsigned char *
2579
get_lt_default_name(struct hlink *link)
2581
struct lt_default_name *entry = lt_names;
2585
while (entry && entry->str) {
2586
if (entry->type == link->type) return entry->str;
2594
html_link_clear(struct hlink *link)
2598
if (link->content_type) mem_free(link->content_type);
2599
if (link->media) mem_free(link->media);
2600
if (link->href) mem_free(link->href);
2601
if (link->hreflang) mem_free(link->hreflang);
2602
if (link->title) mem_free(link->title);
2603
if (link->lang) mem_free(link->lang);
2604
if (link->name) mem_free(link->name);
2606
memset(link, 0, sizeof(struct hlink));
2609
/* Parse a link and return results in @link.
2610
* It tries to identify known types. */
2612
html_link_parse(unsigned char *a, struct hlink *link)
2615
memset(link, 0, sizeof(struct hlink));
2617
link->href = get_url_val(a, "href");
2618
if (!link->href) return 0;
2620
link->lang = get_attr_val(a, "lang");
2621
link->hreflang = get_attr_val(a, "hreflang");
2622
link->title = get_attr_val(a, "title");
2623
link->content_type = get_attr_val(a, "type");
2624
link->media = get_attr_val(a, "media");
2626
link->name = get_attr_val(a, "rel");
2627
if (link->name) link->direction = LD_REL;
2629
link->name = get_attr_val(a, "rev");
2630
if (link->name) link->direction = LD_REV;
2633
if (!link->name) return 1;
2635
/* TODO: fastfind */
2636
if (!strcasecmp(link->name, "start") ||
2637
!strcasecmp(link->name, "top") ||
2638
!strcasecmp(link->name, "home"))
2639
link->type = LT_START;
2640
else if (!strcasecmp(link->name, "parent") ||
2641
!strcasecmp(link->name, "up"))
2642
link->type = LT_PARENT;
2643
else if (!strcasecmp(link->name, "next"))
2644
link->type = LT_NEXT;
2645
else if (!strcasecmp(link->name, "prev") ||
2646
!strcasecmp(link->name, "previous"))
2647
link->type = LT_PREV;
2648
else if (!strcasecmp(link->name, "contents") ||
2649
!strcasecmp(link->name, "toc"))
2650
link->type = LT_CONTENTS;
2651
else if (!strcasecmp(link->name, "index"))
2652
link->type = LT_INDEX;
2653
else if (!strcasecmp(link->name, "glossary"))
2654
link->type = LT_GLOSSARY;
2655
else if (!strcasecmp(link->name, "chapter"))
2656
link->type = LT_CHAPTER;
2657
else if (!strcasecmp(link->name, "section"))
2658
link->type = LT_SECTION;
2659
else if (!strcasecmp(link->name, "subsection") ||
2660
!strcasecmp(link->name, "child") ||
2661
!strcasecmp(link->name, "sibling"))
2662
link->type = LT_SUBSECTION;
2663
else if (!strcasecmp(link->name, "appendix"))
2664
link->type = LT_APPENDIX;
2665
else if (!strcasecmp(link->name, "help"))
2666
link->type = LT_HELP;
2667
else if (!strcasecmp(link->name, "search"))
2668
link->type = LT_SEARCH;
2669
else if (!strcasecmp(link->name, "bookmark"))
2670
link->type = LT_BOOKMARK;
2671
else if (!strcasecmp(link->name, "copyright"))
2672
link->type = LT_COPYRIGHT;
2673
else if (!strcasecmp(link->name, "author") ||
2674
!strcasecmp(link->name, "made") ||
2675
!strcasecmp(link->name, "owner"))
2676
link->type = LT_AUTHOR;
2677
else if (strcasestr(link->name, "icon") ||
2678
(link->content_type && strcasestr(link->content_type, "icon")))
2679
link->type = LT_ICON;
2680
else if (strcasestr(link->name, "alternate")) {
2681
link->type = LT_ALTERNATE;
2683
link->type = LT_ALTERNATE_LANG;
2684
else if (strcasestr(link->name, "stylesheet") ||
2685
(link->content_type && strcasestr(link->content_type, "css")))
2686
link->type = LT_ALTERNATE_STYLESHEET;
2687
else if (link->media)
2688
link->type = LT_ALTERNATE_MEDIA;
2689
} else if (!strcasecmp(link->name, "stylesheet") ||
2690
(link->content_type && strcasestr(link->content_type, "css")))
2691
link->type = LT_STYLESHEET;
2697
html_link(unsigned char *a)
2699
int link_display = global_doc_opts->meta_link_display;
2700
unsigned char *name = NULL;
2702
static unsigned char link_rel_string[] = "Link: ";
2703
static unsigned char link_rev_string[] = "Reverse link: ";
2705
if (!link_display) return;
2706
if (!html_link_parse(a, &link)) return;
2708
/* Ignore few annoying links.. */
2709
if (link_display < 5 &&
2710
(link.type == LT_ICON ||
2711
link.type == LT_AUTHOR ||
2712
link.type == LT_STYLESHEET ||
2713
link.type == LT_ALTERNATE_STYLESHEET)) goto free_and_return;
2716
if (!link.name || link.type != LT_UNKNOWN)
2717
/* Give preference to our default names for known types. */
2718
name = get_lt_default_name(&link);
2722
if (name && link.href) {
2724
int name_neq_title = 0;
2727
if (!init_string(&text)) goto free_and_return;
2732
add_to_string(&text, link.title);
2733
name_neq_title = strcmp(link.title, name);
2735
add_to_string(&text, name);
2737
if (link_display == 1) goto only_title;
2739
if (name_neq_title) {
2740
if (!first) add_to_string(&text, ", ");
2741
else add_to_string(&text, " (");
2742
add_to_string(&text, name);
2746
if (link_display >= 3 && link.hreflang) {
2747
if (!first) add_to_string(&text, ", ");
2748
else add_to_string(&text, " (");
2749
add_to_string(&text, link.hreflang);
2753
if (link_display >= 4 && link.content_type) {
2754
if (!first) add_to_string(&text, ", ");
2755
else add_to_string(&text, " (");
2756
add_to_string(&text, link.content_type);
2760
if (link.lang && link.type == LT_ALTERNATE_LANG &&
2761
(link_display < 3 || (link.hreflang &&
2762
strcasecmp(link.hreflang, link.lang)))) {
2763
if (!first) add_to_string(&text, ", ");
2764
else add_to_string(&text, " (");
2765
add_to_string(&text, link.lang);
2770
if (!first) add_to_string(&text, ", ");
2771
else add_to_string(&text, " (");
2772
add_to_string(&text, link.media);
2776
if (!first) add_char_to_string(&text, ')');
2780
put_link_line((link.direction == LD_REL) ? link_rel_string : link_rev_string,
2781
text.source, link.href, format.target_base);
2783
put_link_line((link.direction == LD_REL) ? link_rel_string : link_rev_string,
2784
name, link.href, format.target_base);
2786
if (text.source) done_string(&text);
2790
html_link_clear(&link);
2793
struct element_info {
2794
unsigned char *name;
2795
void (*func)(unsigned char *);
2800
#define NUMBER_OF_TAGS 64
2802
static struct element_info elements[] = {
2803
{"A", html_a, 0, 2},
2804
{"ABBR", html_italic, 0, 0},
2805
{"ADDRESS", html_address, 2, 0},
2806
{"B", html_bold, 0, 0},
2807
{"BASE", html_base, 0, 1},
2808
{"BASEFONT", html_font, 0, 1},
2809
{"BLOCKQUOTE", html_blockquote,2, 0},
2810
{"BODY", html_body, 0, 0},
2811
{"BR", html_br, 1, 1},
2812
{"BUTTON", html_button, 0, 0},
2813
{"CAPTION", html_center, 1, 0},
2814
{"CENTER", html_center, 1, 0},
2815
{"CODE", html_fixed, 0, 0},
2816
{"DD", html_dd, 1, 1},
2817
{"DFN", html_bold, 0, 0},
2818
{"DIR", html_ul, 2, 0},
2819
{"DIV", html_linebrk, 1, 0},
2820
{"DL", html_dl, 2, 0},
2821
{"DT", html_dt, 1, 1},
2822
{"EM", html_italic, 0, 0},
2823
{"FIXED", html_fixed, 0, 0},
2824
{"FONT", html_font, 0, 0},
2825
{"FORM", html_form, 1, 0},
2826
{"FRAME", html_frame, 1, 1},
2827
{"FRAMESET", html_frameset, 1, 0},
2828
{"H1", html_h1, 2, 2},
2829
{"H2", html_h2, 2, 2},
2830
{"H3", html_h3, 2, 2},
2831
{"H4", html_h4, 2, 2},
2832
{"H5", html_h5, 2, 2},
2833
{"H6", html_h6, 2, 2},
2834
/* {"HEAD", html_skip, 0, 0}, */
2835
{"HR", html_hr, 2, 1},
2836
{"I", html_italic, 0, 0},
2837
{"IFRAME", html_iframe, 1, 1},
2838
{"IMG", html_img, 0, 1},
2839
{"INPUT", html_input, 0, 1},
2840
{"LI", html_li, 1, 3},
2841
{"LINK", html_link, 1, 1},
2842
{"LISTING", html_pre, 2, 0},
2843
{"MENU", html_ul, 2, 0},
2844
{"NOFRAMES", html_noframes, 0, 0},
2845
{"OBJECT", html_object, 1, 1},
2846
{"OL", html_ol, 2, 0},
2847
{"OPTION", html_option, 1, 1},
2848
{"P", html_p, 2, 2},
2849
{"PRE", html_pre, 2, 0},
2850
{"Q", html_italic, 0, 0},
2851
{"S", html_underline, 0, 0},
2852
{"SCRIPT", html_skip, 0, 0},
2853
{"SELECT", html_select, 0, 0},
2854
{"SPAN", html_span, 0, 0},
2855
{"STRIKE", html_underline, 0, 0},
2856
{"STRONG", html_bold, 0, 0},
2857
{"STYLE", html_skip, 0, 0},
2858
{"SUB", html_subscript, 0, 0},
2859
{"SUP", html_superscript,0,0},
2860
{"TABLE", html_table, 2, 0},
2861
{"TD", html_td, 0, 0},
2862
{"TEXTAREA", html_textarea, 0, 1},
2863
{"TH", html_th, 0, 0},
2864
{"TITLE", html_title, 0, 0},
2865
{"TR", html_tr, 1, 0},
2866
{"U", html_underline, 0, 0},
2867
{"UL", html_ul, 2, 0},
2868
{"XMP", html_xmp, 2, 0},
2873
skip_comment(unsigned char *html, unsigned char *eof)
2875
int comm = html + 4 <= eof && html[2] == '-' && html[3] == '-';
2877
html += comm ? 4 : 2;
2878
while (html < eof) {
2879
if (!comm && html[0] == '>') return html + 1;
2880
if (comm && html + 2 <= eof && html[0] == '-' && html[1] == '-') {
2882
while (html < eof && *html == '-') html++;
2883
while (html < eof && WHITECHAR(*html)) html++;
2884
if (html >= eof) return eof;
2885
if (*html == '>') return html + 1;
2894
process_head(unsigned char *head)
2896
unsigned char *refresh, *url;
2898
refresh = parse_http_header(head, "Refresh", NULL);
2900
url = parse_http_header_param(refresh, "URL");
2902
unsigned char *saved_url = url;
2903
/* Extraction of refresh time. */
2904
unsigned long seconds;
2907
seconds = strtoul(refresh, NULL, 10);
2908
if (errno || seconds > 7200) seconds = 0;
2910
html_focusable(NULL);
2911
url = join_urls(format.href_base, saved_url);
2912
put_link_line("Refresh: ", saved_url, url, global_doc_opts->framename);
2913
special_f(ff, SP_REFRESH, seconds, url);
2915
mem_free(saved_url);
2921
#ifndef USE_FASTFIND
2924
compar(const void *a, const void *b)
2926
return strcasecmp(((struct element_info *) a)->name,
2927
((struct element_info *) b)->name);
2932
static struct fastfind_info *ff_info_tags;
2933
static struct element_info *internal_pointer;
2935
/* Reset internal list pointer */
2937
tags_list_reset(void)
2939
internal_pointer = elements;
2942
/* Returns a pointer to a struct that contains
2943
* current key and data pointers and increment
2945
* It returns NULL when key is NULL. */
2946
struct fastfind_key_value *
2947
tags_list_next(void)
2949
static struct fastfind_key_value kv;
2951
if (!internal_pointer->name) return NULL;
2953
kv.key = internal_pointer->name;
2954
kv.data = internal_pointer;
2961
#endif /* USE_FASTFIND */
2964
init_tags_lookup(void)
2967
ff_info_tags = fastfind_index(&tags_list_reset, &tags_list_next, 0);
2968
fastfind_index_compress(ff_info_tags);
2973
free_tags_lookup(void)
2976
fastfind_done(ff_info_tags);
2981
parse_html(unsigned char *html, unsigned char *eof,
2982
void *f, unsigned char *head)
2984
/*unsigned char *start = html;*/
2988
line_breax = table_level ? 2 : 1;
2994
if (head) process_head(head);
3000
while (html < eof) {
3001
struct element_info *ei;
3002
unsigned char *name, *attr, *end, *prev_html;
3007
if (WHITECHAR(*html) && par_format.align != AL_NONE) {
3008
unsigned char *h = html;
3012
while (html < eof && WHITECHAR(*html)) html++;
3017
while (h < eof && WHITECHAR(*h)) h++;
3018
if (h + 1 < eof && h[0] == '<' && h[1] == '/') {
3019
if (!parse_element(h, eof, &name, &namelen, &attr, &end)) {
3020
put_chrs(lt, html - lt, put_chars_f, f);
3027
if (!(position + (html - lt - 1))) goto skip_w; /* ??? */
3028
if (*(html - 1) == ' ') {
3029
/* BIG performance win; not sure if it doesn't cause any bug */
3030
if (html < eof && !WHITECHAR(*html)) continue;
3031
put_chrs(lt, html - lt, put_chars_f, f);
3033
put_chrs(lt, html - 1 - lt, put_chars_f, f);
3034
put_chrs(" ", 1, put_chars_f, f);
3038
while (html < eof && WHITECHAR(*html)) html++;
3043
put_chrs(" ", 1, put_chars_f, f);
3047
if (par_format.align == AL_NONE) {
3049
if (*html == ASCII_TAB) {
3050
put_chrs(lt, html - lt, put_chars_f, f);
3051
put_chrs(" ", 8 - (position % 8), put_chars_f, f);
3054
} else if (*html == ASCII_CR || *html == ASCII_LF) {
3055
put_chrs(lt, html - lt, put_chars_f, f);
3058
if (*html == ASCII_CR && html < eof - 1
3059
&& html[1] == ASCII_LF)
3061
ln_break(1, line_break_f, f);
3063
if (*html == ASCII_CR || *html == ASCII_LF) {
3071
while (*html < ' ') {
3072
/*if (putsp == 1) goto put_sp;
3074
if (html - lt) put_chrs(lt, html - lt, put_chars_f, f);
3077
if (*html >= ' ' || WHITECHAR(*html) || html >= eof) {
3078
unsigned char *dots = mem_alloc(dotcounter);
3081
memset(dots, '.', dotcounter);
3082
put_chrs(dots, dotcounter, put_chars_f, f);
3089
if (html + 2 <= eof && html[0] == '<' && (html[1] == '!' || html[1] == '?') && !was_xmp) {
3090
/*if (putsp == 1) goto put_sp;
3092
put_chrs(lt, html - lt, put_chars_f, f);
3093
html = skip_comment(html, eof);
3097
if (*html != '<' || parse_element(html, eof, &name, &namelen, &attr, &end)) {
3098
/*if (putsp == 1) goto put_sp;
3105
inv = *name == '/'; name += inv; namelen -= inv;
3106
if (!inv && putsp == 1 && !html_top.invisible) goto put_sp;
3107
put_chrs(lt, html - lt, put_chars_f, f);
3108
if (par_format.align != AL_NONE && !inv && !putsp) {
3109
unsigned char *ee = end;
3112
while (!parse_element(ee, eof, &nm, NULL, NULL, &ee))
3115
if (ee < eof && WHITECHAR(*ee)) {
3117
put_chrs(" ", 1, put_chars_f, f);
3126
for (ei = elements; ei->name; ei++) {
3127
if (strlcasecmp(ei->name, -1, name, namelen))
3131
#ifndef USE_FASTFIND
3133
struct element_info elem;
3136
tmp = name[namelen];
3137
name[namelen] = '\0';
3140
ei = bsearch(&elem, elements, NUMBER_OF_TAGS, sizeof(struct element_info), compar);
3141
name[namelen] = tmp;
3144
ei = (struct element_info *) fastfind_search(name, namelen, ff_info_tags);
3146
while (ei) { /* This exists just to be able to conviently break; out. */
3151
put_chrs("<", 1, put_chars_f, f);
3152
html = prev_html + 1;
3155
ln_break(ei->linebreak, line_break_f, f);
3156
if ((a = get_attr_val(attr, "id"))) {
3157
special_f(f, SP_TAG, a);
3160
if (!html_top.invisible) {
3161
int ali = (par_format.align == AL_NONE);
3162
struct par_attrib pa = par_format;
3164
if (ei->func == html_table && global_doc_opts->tables && table_level < HTML_MAX_TABLE_LEVEL) {
3165
format_table(attr, html, eof, &html, f);
3166
ln_break(2, line_break_f, f);
3169
if (ei->func == html_select) {
3170
if (!do_html_select(attr, html, eof, &html, f))
3173
if (ei->func == html_textarea) {
3174
do_html_textarea(attr, html, eof, &html, f);
3177
if (ei->nopair == 2 || ei->nopair == 3) {
3178
struct html_element *e;
3180
if (ei->nopair == 2) {
3181
foreach (e, html_stack) {
3182
if (e->type != ELEMENT_KILLABLE) break;
3183
if (e->linebreak || !ei->linebreak) break;
3185
} else foreach (e, html_stack) {
3186
if (e->linebreak && !ei->linebreak) break;
3187
if (e->type != ELEMENT_KILLABLE) break;
3188
if (!strlcasecmp(e->name, e->namelen, name, namelen)) break;
3190
if (!strlcasecmp(e->name, e->namelen, name, namelen)) {
3191
while (e->prev != (void *)&html_stack) kill_html_stack_item(e->prev);
3193
if (e->type != ELEMENT_IMMORTAL)
3194
kill_html_stack_item(e);
3197
if (ei->nopair != 1) {
3198
html_stack_dup(ELEMENT_KILLABLE);
3199
html_top.name = name;
3200
html_top.namelen = namelen;
3201
html_top.options = attr;
3202
html_top.linebreak = ei->linebreak;
3204
if (ei->func) ei->func(attr);
3205
if (ei->func != html_br) was_br = 0;
3206
if (ali) par_format = pa;
3209
struct html_element *e, *elt;
3214
if (ei->func == html_xmp)
3221
if (ei->nopair == 1 || ei->nopair == 3) break;
3223
foreach (e, html_stack) {
3224
if (e->linebreak && !ei->linebreak) xxx = 1;
3225
if (strlcasecmp(e->name, e->namelen, name, namelen)) {
3226
if (e->type != ELEMENT_KILLABLE)
3232
kill_html_stack_item(e);
3235
for (elt = e; elt != (void *)&html_stack; elt = elt->prev)
3236
if (elt->linebreak > lnb) lnb = elt->linebreak;
3237
ln_break(lnb, line_break_f, f);
3238
while (e->prev != (void *)&html_stack) kill_html_stack_item(e->prev);
3239
kill_html_stack_item(e);
3249
put_chrs(lt, html - lt, put_chars_f, f);
3250
ln_break(1, line_break_f, f);
3258
look_for_map(unsigned char **pos, unsigned char *eof, unsigned char *tag)
3260
unsigned char *al, *attr, *name;
3263
while (*pos < eof && **pos != '<') {
3267
if (*pos >= eof) return 0;
3269
if (*pos + 2 <= eof && ((*pos)[1] == '!' || (*pos)[1] == '?')) {
3270
*pos = skip_comment(*pos, eof);
3274
if (parse_element(*pos, eof, &name, &namelen, &attr, pos)) {
3279
if (strlcasecmp(name, namelen, "MAP", 3)) return 1;
3282
al = get_attr_val(attr, "name");
3285
if (strcasecmp(al, tag)) {
3297
look_for_tag(unsigned char **pos, unsigned char *eof,
3298
unsigned char *name, int namelen, unsigned char **label)
3300
unsigned char *pos2;
3303
if (!init_string(&str)) {
3304
/* Is this the right way to bail out? --jonas */
3310
while (pos2 < eof && *pos2 != '<') {
3320
add_bytes_to_string(&str, *pos, pos2 - *pos);
3321
*label = str.source;
3325
if (*pos + 2 <= eof && ((*pos)[1] == '!' || (*pos)[1] == '?')) {
3326
*pos = skip_comment(*pos, eof);
3330
if (parse_element(*pos, eof, NULL, NULL, NULL, &pos2)) return 1;
3332
if (!(!strlcasecmp(name, namelen, "A", 1) ||
3333
!strlcasecmp(name, namelen, "/A", 2) ||
3334
!strlcasecmp(name, namelen, "MAP", 3) ||
3335
!strlcasecmp(name, namelen, "/MAP", 4) ||
3336
!strlcasecmp(name, namelen, "AREA", 4) ||
3337
!strlcasecmp(name, namelen, "/AREA", 5))) {
3346
look_for_link(unsigned char **pos, unsigned char *eof,
3347
unsigned char *tag, struct menu_item **menu,
3348
struct memory_list **ml, unsigned char *href_base,
3349
unsigned char *target_base, struct conv_table *ct)
3351
unsigned char *attr, *label, *href, *name, *target;
3352
struct link_def *ld;
3353
struct menu_item *nm;
3357
while (*pos < eof && **pos != '<') {
3361
if (*pos >= eof) return 0;
3363
if (*pos + 2 <= eof && ((*pos)[1] == '!' || (*pos)[1] == '?')) {
3364
*pos = skip_comment(*pos, eof);
3368
if (parse_element(*pos, eof, &name, &namelen, &attr, pos)) {
3373
if (!strlcasecmp(name, namelen, "A", 1)) {
3374
while (look_for_tag(pos, eof, name, namelen, &label));
3376
if (*pos >= eof) return 0;
3378
} else if (!strlcasecmp(name, namelen, "AREA", 4)) {
3379
unsigned char *alt = get_attr_val(attr, "alt");
3382
label = convert_string(ct, alt, strlen(alt), CSM_DEFAULT);
3388
} else if (!strlcasecmp(name, namelen, "/MAP", 4)) {
3389
/* This is the only successful return from here! */
3390
add_to_ml(ml, *menu, NULL);
3397
target = get_target(attr);
3398
if (!target) target = null_or_stracpy(target_base);
3399
if (!target) target = stracpy("");
3401
if (label) mem_free(label);
3405
ld = mem_alloc(sizeof(struct link_def));
3407
if (label) mem_free(label);
3412
href = get_url_val(attr, "href");
3414
if (label) mem_free(label);
3421
ld->link = join_urls(href_base, href);
3424
if (label) mem_free(label);
3431
ld->target = target;
3432
for (nmenu = 0; !mi_is_end_of_menu((*menu)[nmenu]); nmenu++) {
3433
struct link_def *ll = (*menu)[nmenu].data;
3435
if (!strcmp(ll->link, ld->link) &&
3436
!strcmp(ll->target, ld->target)) {
3438
mem_free(ld->target);
3440
if (label) mem_free(label);
3455
label = stracpy(ld->link);
3464
nm = mem_realloc(*menu, (nmenu + 2) * sizeof(struct menu_item));
3467
memset(&nm[nmenu], 0, 2 * sizeof(struct menu_item));
3468
nm[nmenu].text = label;
3469
nm[nmenu].func = (menu_func) map_selected;
3470
nm[nmenu].data = ld;
3471
nm[nmenu].flags = NO_INTL;
3474
add_to_ml(ml, ld, ld->link, ld->target, label, NULL);
3480
scan_http_equiv(unsigned char *s, unsigned char *eof, struct string *head,
3481
struct string *title)
3483
unsigned char *name, *attr, *he, *c;
3486
if (title && !init_string(title)) return;
3488
add_char_to_string(head, '\n');
3491
while (s < eof && *s != '<') {
3495
if (s >= eof) return;
3496
if (s + 2 <= eof && (s[1] == '!' || s[1] == '?')) {
3497
s = skip_comment(s, eof);
3500
if (parse_element(s, eof, &name, &namelen, &attr, &s)) goto sp;
3503
if (!strlcasecmp(name, namelen, "HEAD", 4)) goto se;
3504
if (!strlcasecmp(name, namelen, "/HEAD", 5)) return;
3505
if (!strlcasecmp(name, namelen, "BODY", 4)) return;
3506
if (title && !title->length && !strlcasecmp(name, namelen, "TITLE", 5)) {
3511
while (s < eof && *s != '<') {
3516
add_bytes_to_string(title, s1, s - s1);
3517
if (s >= eof) goto se;
3518
if (s + 2 <= eof && (s[1] == '!' || s[1] == '?')) {
3519
s = skip_comment(s, eof);
3522
if (parse_element(s, eof, &name, &namelen, &attr, &s)) goto xsp;
3523
clr_spaces(title->source);
3526
if (strlcasecmp(name, namelen, "META", 4)) goto se;
3528
he = get_attr_val(attr, "charset");
3530
add_to_string(head, "Charset: ");
3531
add_to_string(head, he);
3535
he = get_attr_val(attr, "http-equiv");
3538
add_to_string(head, he);
3540
c = get_attr_val(attr, "content");
3542
add_to_string(head, ": ");
3543
add_to_string(head, c);
3548
add_to_string(head, "\r\n");
3553
get_image_map(unsigned char *head, unsigned char *pos, unsigned char *eof,
3554
unsigned char *tag, struct menu_item **menu,
3555
struct memory_list **ml, unsigned char *href_base,
3556
unsigned char *target_base, int to, int def, int hdef)
3558
struct conv_table *ct;
3561
if (!init_string(&hd)) return -1;
3563
if (head) add_to_string(&hd, head);
3564
scan_http_equiv(pos, eof, &hd, NULL);
3565
ct = get_convert_table(hd.source, to, def, NULL, NULL, hdef);
3568
*menu = mem_calloc(1, sizeof(struct menu_item));
3569
if (!*menu) return -1;
3571
while (look_for_map(&pos, eof, tag));
3580
while (look_for_link(&pos, eof, tag, menu, ml,
3581
href_base, target_base, ct));
3592
struct html_element *
3593
init_html_parser_state(enum html_element_type type, int align, int margin, int width)
3595
struct html_element *element;
3597
html_stack_dup(type);
3598
element = &html_top;
3600
par_format.align = align;
3602
if (type == ELEMENT_IMMORTAL) {
3603
par_format.leftmargin = margin;
3604
par_format.rightmargin = margin;
3605
par_format.width = width;
3606
par_format.list_level = 0;
3607
par_format.list_number = 0;
3608
par_format.dd_margin = 0;
3609
html_top.namelen = 0;
3616
done_html_parser_state(struct html_element *element)
3620
while (&html_top != element) {
3621
kill_html_stack_item(&html_top);
3623
/* I've preserved this bit to show an example of the Old Code
3624
* of the Mikulas days (I _HOPE_ it's by Mikulas, at least ;-).
3625
* I think this assert() can never fail, for one. --pasky */
3626
assertm(&html_top && (void *)&html_top != (void *)&html_stack,
3627
"html stack trashed");
3628
if_assert_failed break;
3632
html_top.type = ELEMENT_KILLABLE;
3633
kill_html_stack_item(&html_top);
3638
init_html_parser(unsigned char *url, struct document_options *options,
3639
unsigned char *start, unsigned char *end,
3640
struct string *head, struct string *title,
3641
void (*put_chars)(void *, unsigned char *, int),
3642
void (*line_break)(void *),
3643
void *(*special)(void *, enum html_special_type, ...))
3645
struct html_element *e;
3647
assert(url && options);
3648
if_assert_failed return;
3649
assertm(list_empty(html_stack), "something on html stack");
3650
if_assert_failed init_list(html_stack);
3654
put_chars_f = put_chars;
3655
line_break_f = line_break;
3656
special_f = special;
3657
scan_http_equiv(start, end, head, title);
3659
e = mem_calloc(1, sizeof(struct html_element));
3662
add_to_list(html_stack, e);
3665
format.fontsize = 3;
3666
format.link = format.target = format.image = NULL;
3667
format.select = NULL;
3669
format.title = NULL;
3671
format.fg = options->default_fg;
3672
format.bg = options->default_bg;
3673
format.clink = options->default_link;
3674
format.vlink = options->default_vlink;
3676
format.href_base = stracpy(url);
3677
format.target_base = null_or_stracpy(options->framename);
3679
par_format.align = AL_LEFT;
3680
par_format.leftmargin = options->margin;
3681
par_format.rightmargin = options->margin;
3683
par_format.width = options->width;
3684
par_format.list_level = par_format.list_number = 0;
3685
par_format.dd_margin = options->margin;
3686
par_format.flags = P_NONE;
3688
par_format.bgcolor = options->default_bg;
3690
html_top.invisible = 0;
3691
html_top.name = NULL;
3692
html_top.namelen = 0;
3693
html_top.options = NULL;
3694
html_top.linebreak = 1;
3695
html_top.type = ELEMENT_DONT_KILL;
3699
last_form_tag = NULL;
3700
last_form_attr = NULL;
3701
last_input_tag = NULL;
3705
done_html_parser(void)
3707
if (form.action) mem_free(form.action), form.action = NULL;
3708
if (form.target) mem_free(form.target), form.target = NULL;
3710
kill_html_stack_item(html_stack.next);
3712
assertm(list_empty(html_stack),
3713
"html stack not empty after operation");
3714
if_assert_failed init_list(html_stack);