2
** Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM
3
** ==========================================================================
8
#define Lynx_HTML_Handler
23
#include <LYGlobalDefs.h>
24
#include <LYCharUtils.h>
25
#include <LYCharSets.h>
29
#include <HTNestedList.h>
31
#include <LYStrings.h>
34
#include <LYBookmark.h>
42
* Used for nested lists. - FM
44
PUBLIC int OL_CONTINUE = -29999; /* flag for whether CONTINUE is set */
45
PUBLIC int OL_VOID = -29998; /* flag for whether a count is set */
49
** This function converts any ampersands in allocated
50
** strings to "&". If isTITLE is TRUE, it also
51
** converts any angle-brackets to "<" or ">". - FM
53
PUBLIC void LYEntify ARGS2(
58
char *q = NULL, *cp = NULL;
59
int amps = 0, lts = 0, gts = 0;
62
{ S_text, S_esc, S_dollar, S_paren,
63
S_nonascii_text, S_dollar_paren } state = S_text;
71
* Count the ampersands. - FM
73
while ((*p != '\0') && (q = strchr(p, '&')) != NULL) {
79
* Count the left-angle-brackets, if needed. - FM
81
if (isTITLE == TRUE) {
83
while ((*p != '\0') && (q = strchr(p, '<')) != NULL) {
90
* Count the right-angle-brackets, if needed. - FM
92
if (isTITLE == TRUE) {
94
while ((*p != '\0') && (q = strchr(p, '>')) != NULL) {
101
* Check whether we need to convert anything. - FM
103
if (amps == 0 && lts == 0 && gts == 0)
107
* Allocate space and convert. - FM
109
q = typecallocn(char,
110
(strlen(*str) + (4 * amps) + (3 * lts) + (3 * gts) + 1));
111
if ((cp = q) == NULL)
112
outofmem(__FILE__, "LYEntify");
113
for (p = *str; *p; p++) {
115
if (HTCJK != NOCJK) {
130
} else if (*p == '(') {
141
if (*p == '@' || *p == 'B' || *p == 'A') {
142
state = S_nonascii_text;
145
} else if (*p == '(') {
146
state = S_dollar_paren;
157
state = S_nonascii_text;
167
if (*p == 'B' || *p == 'J' || *p =='T') {
171
} else if (*p == 'I') {
172
state = S_nonascii_text;
178
case S_nonascii_text:
187
if (*(p+1) != '\0' &&
188
(IS_EUC(UCH(*p), UCH(*(p+1))) ||
189
IS_SJIS(UCH(*p), UCH(*(p+1)), in_sjis) ||
190
IS_BIG5(UCH(*p), UCH(*(p+1))))) {
203
} else if (isTITLE && *p == '<') {
208
} else if (isTITLE && *p == '>') {
223
** This function trims characters <= that of a space (32),
224
** including HT_NON_BREAK_SPACE (1) and HT_EN_SPACE (2),
225
** but not ESC, from the heads of strings. - FM
227
PUBLIC void LYTrimHead ARGS1(
235
while (*s && WHITE(*s) && UCH(*s) != UCH(CH_ESC)) /* S/390 -- gil -- 1669 */
247
** This function trims characters <= that of a space (32),
248
** including HT_NON_BREAK_SPACE (1), HT_EN_SPACE (2), and
249
** ESC from the tails of strings. - FM
251
PUBLIC void LYTrimTail ARGS1(
270
** This function should receive a pointer to the start
271
** of a comment. It returns a pointer to the end ('>')
272
** character of comment, or it's best guess if the comment
275
PUBLIC char *LYFindEndOfComment ARGS1(
279
enum comment_state { start1, start2, end1, end2 } state;
283
* We got NULL, so return NULL. - FM
287
if (strncmp(str, "<!--", 4))
289
* We don't have the start of a comment, so
290
* return the beginning of the string. - FM
297
* It's an invalid comment, so
298
* return this end character. - FM
302
if ((cp1 = strchr(cp, '>')) == NULL)
304
* We don't have an end character, so
305
* return the beginning of the string. - FM
311
* Ugh, it's a "decorative" series of dashes,
312
* so return the next end character. - FM
317
* OK, we're ready to start parsing. - FM
320
while (*cp != '\0') {
327
* Invalid comment, so return the first
328
* '>' from the start of the string. - FM
343
* Invalid comment, so return the first
344
* '>' from the start of the string. - FM
352
* Valid comment, so return the end character. - FM
357
} else if (!(WHITE(*cp) && UCH(*cp) != UCH(CH_ESC))) { /* S/390 -- gil -- 1686 */
359
* Invalid comment, so return the first
360
* '>' from the start of the string. - FM
373
* Invalid comment, so return the first
374
* '>' from the start of the string. - FM
380
** If an HREF, itself or if resolved against a base,
381
** represents a file URL, and the host is defaulted,
382
** force in "//localhost". We need this until
383
** all the other Lynx code which performs security
384
** checks based on the "localhost" string is changed
385
** to assume "//localhost" when a host field is not
386
** present in file URLs - FM
388
PUBLIC void LYFillLocalFileURL ARGS2(
397
if (!strcmp(*href, "//") || !strncmp(*href, "///", 3)) {
398
if (base != NULL && isFILE_URL(base)) {
399
StrAllocCopy(temp, STR_FILE_URL);
400
StrAllocCat(temp, *href);
401
StrAllocCopy(*href, temp);
404
if (isFILE_URL(*href)) {
405
if (*(*href+5) == '\0') {
406
StrAllocCat(*href, "//localhost");
407
} else if (!strcmp(*href, "file://")) {
408
StrAllocCat(*href, "localhost");
409
} else if (!strncmp(*href, "file:///", 8)) {
410
StrAllocCopy(temp, (*href+7));
411
LYLocalFileToURL (href, temp);
412
} else if (!strncmp(*href, "file:/", 6) && !LYIsHtmlSep(*(*href+6))) {
413
StrAllocCopy(temp, (*href+5));
414
LYLocalFileToURL (href, temp);
418
#if defined(USE_DOS_DRIVES)
419
if (LYIsDosDrive(*href)) {
421
* If it's a local DOS path beginning with drive letter,
422
* add file://localhost/ prefix and go ahead.
424
StrAllocCopy(temp, *href);
425
LYLocalFileToURL (href, temp);
428
/* use below: strlen("file://localhost/") = 17 */
429
if (!strncmp(*href, "file://localhost/", 17)
430
&& (strlen(*href) == 19)
431
&& LYIsDosDrive(*href+17)) {
433
* Terminate DOS drive letter with a slash to surf root successfully.
434
* Here seems a proper place to do so.
438
#endif /* USE_DOS_DRIVES */
441
* No path in a file://localhost URL means a
442
* directory listing for the current default. - FM
444
if (!strcmp(*href, "file://localhost")) {
447
temp2 = HTVMS_wwwName(LYGetEnv("PATH"));
449
char curdir[LY_MAXPATH];
450
temp2 = wwwName(Current_Dir(curdir));
452
if (!LYIsHtmlSep(*temp2))
455
* Check for pathological cases - current dir has chars which
456
* MUST BE URL-escaped - kw
458
if (strchr(temp2, '%') != NULL || strchr(temp2, '#') != NULL) {
460
temp = HTEscape(temp2, URL_PATH);
461
StrAllocCat(*href, temp);
463
StrAllocCat(*href, temp2);
469
* On VMS, a file://localhost/ URL means
470
* a listing for the login directory. - FM
472
if (!strcmp(*href, "file://localhost/"))
473
StrAllocCat(*href, (HTVMS_wwwName(Home_Dir())+1));
481
** This function writes a line with a META tag to an open file,
482
** which will specify a charset parameter to use when the file is
483
** read back in. It is meant for temporary HTML files used by the
484
** various special pages which may show titles of documents. When those
485
** files are created, the title strings normally have been translated and
486
** expanded to the display character set, so we have to make sure they
487
** don't get translated again.
488
** If the user has changed the display character set during the lifetime
489
** of the Lynx session (or, more exactly, during the time the title
490
** strings to be written were generated), they may now have different
491
** character encodings and there is currently no way to get it all right.
492
** To change this, we would have to add a variable for each string which
493
** keeps track of its character encoding.
494
** But at least we can try to ensure that reading the file after future
495
** display character set changes will give reasonable output.
497
** The META tag is not written if the display character set (passed as
498
** disp_chndl) already corresponds to the charset assumption that
499
** would be made when the file is read. - KW
501
** Currently this function is used for temporary files like "Lynx Info Page"
502
** and for one permanent - bookmarks (so it may be a problem if you change
503
** the display charset later: new bookmark entries may be mistranslated).
506
PUBLIC void LYAddMETAcharsetToFD ARGS2(
510
if (disp_chndl == -1)
512
* -1 means use current_char_set.
514
disp_chndl = current_char_set;
516
if (fd == NULL || disp_chndl < 0)
522
if (UCLYhndl_HTFile_for_unspec == disp_chndl)
524
* Not need to do, so we don't.
528
if (LYCharSet_UC[disp_chndl].enc == UCT_ENC_7BIT)
530
* There shouldn't be any 8-bit characters in this case.
535
* In other cases we don't know because UCLYhndl_for_unspec may
536
* change during the lifetime of the file (by toggling raw mode
537
* or changing the display character set), so proceed.
539
fprintf(fd, "<META %s content=\"text/html;charset=%s\">\n",
540
"http-equiv=\"content-type\"",
541
LYCharSet_UC[disp_chndl].MIMEname);
545
** This function returns OL TYPE="A" strings in
546
** the range of " A." (1) to "ZZZ." (18278). - FM
548
PUBLIC char *LYUppercaseA_OL_String ARGS1(
551
static char OLstring[8];
554
strcpy(OLstring, " A.");
558
sprintf(OLstring, " %c.", (seqnum + 64));
562
sprintf(OLstring, "%c%c.", ((seqnum-1)/26 + 64),
563
(seqnum - ((seqnum-1)/26)*26 + 64));
566
if (seqnum < 18279) {
567
sprintf(OLstring, "%c%c%c.", ((seqnum-27)/676 + 64),
568
(((seqnum - ((seqnum-27)/676)*676)-1)/26 + 64),
569
(seqnum - ((seqnum-1)/26)*26 + 64));
572
strcpy(OLstring, "ZZZ.");
577
** This function returns OL TYPE="a" strings in
578
** the range of " a." (1) to "zzz." (18278). - FM
580
PUBLIC char *LYLowercaseA_OL_String ARGS1(
583
static char OLstring[8];
586
strcpy(OLstring, " a.");
590
sprintf(OLstring, " %c.", (seqnum + 96));
594
sprintf(OLstring, "%c%c.", ((seqnum-1)/26 + 96),
595
(seqnum - ((seqnum-1)/26)*26 + 96));
598
if (seqnum < 18279) {
599
sprintf(OLstring, "%c%c%c.", ((seqnum-27)/676 + 96),
600
(((seqnum - ((seqnum-27)/676)*676)-1)/26 + 96),
601
(seqnum - ((seqnum-1)/26)*26 + 96));
604
strcpy(OLstring, "zzz.");
609
** This function returns OL TYPE="I" strings in the
610
** range of " I." (1) to "MMM." (3000).- FM
611
** Maximum length: 16 -TD
613
PUBLIC char *LYUppercaseI_OL_String ARGS1(
616
static char OLstring[20];
619
if (Arabic >= 3000) {
620
strcpy(OLstring, "MMM.");
626
strcpy(OLstring, " I.");
629
strcpy(OLstring, " V.");
632
strcpy(OLstring, " X.");
635
strcpy(OLstring, " L.");
638
strcpy(OLstring, " C.");
641
strcpy(OLstring, " D.");
644
strcpy(OLstring, " M.");
651
while (Arabic >= 1000) {
652
strcat(OLstring, "M");
657
strcat(OLstring, "CM");
662
strcat(OLstring, "D");
664
while (Arabic >= 500) {
665
strcat(OLstring, "C");
671
strcat(OLstring, "CD");
675
while (Arabic >= 100) {
676
strcat(OLstring, "C");
681
strcat(OLstring, "XC");
686
strcat(OLstring, "L");
688
while (Arabic >= 50) {
689
strcat(OLstring, "X");
695
strcat(OLstring, "XL");
699
while (Arabic > 10) {
700
strcat(OLstring, "X");
706
strcat(OLstring, "I.");
709
strcat(OLstring, "II.");
712
strcat(OLstring, "III.");
715
strcat(OLstring, "IV.");
718
strcat(OLstring, "V.");
721
strcat(OLstring, "VI.");
724
strcat(OLstring, "VII.");
727
strcat(OLstring, "VIII.");
730
strcat(OLstring, "IX.");
733
strcat(OLstring, "X.");
736
strcat(OLstring, ".");
744
** This function returns OL TYPE="i" strings in
745
** range of " i." (1) to "mmm." (3000).- FM
746
** Maximum length: 16 -TD
748
PUBLIC char *LYLowercaseI_OL_String ARGS1(
751
static char OLstring[20];
754
if (Arabic >= 3000) {
755
strcpy(OLstring, "mmm.");
761
strcpy(OLstring, " i.");
764
strcpy(OLstring, " v.");
767
strcpy(OLstring, " x.");
770
strcpy(OLstring, " l.");
773
strcpy(OLstring, " c.");
776
strcpy(OLstring, " d.");
779
strcpy(OLstring, " m.");
786
while (Arabic >= 1000) {
787
strcat(OLstring, "m");
792
strcat(OLstring, "cm");
797
strcat(OLstring, "d");
799
while (Arabic >= 500) {
800
strcat(OLstring, "c");
806
strcat(OLstring, "cd");
810
while (Arabic >= 100) {
811
strcat(OLstring, "c");
816
strcat(OLstring, "xc");
821
strcat(OLstring, "l");
823
while (Arabic >= 50) {
824
strcat(OLstring, "x");
830
strcat(OLstring, "xl");
834
while (Arabic > 10) {
835
strcat(OLstring, "x");
841
strcat(OLstring, "i.");
844
strcat(OLstring, "ii.");
847
strcat(OLstring, "iii.");
850
strcat(OLstring, "iv.");
853
strcat(OLstring, "v.");
856
strcat(OLstring, "vi.");
859
strcat(OLstring, "vii.");
862
strcat(OLstring, "viii.");
865
strcat(OLstring, "ix.");
868
strcat(OLstring, "x.");
871
strcat(OLstring, ".");
879
** This function initializes the Ordered List counter. - FM
881
PUBLIC void LYZero_OL_Counter ARGS1(
889
for (i = 0; i < 12; i++) {
890
me->OL_Counter[i] = OL_VOID;
891
me->OL_Type[i] = '1';
894
me->Last_OL_Count = 0;
895
me->Last_OL_Type = '1';
901
** This function is used by the HTML Structured object. - KW
903
PUBLIC void LYGetChartransInfo ARGS1(
906
me->UCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,
907
UCT_STAGE_STRUCTURED);
908
if (me->UCLYhndl < 0) {
909
int chndl = HTAnchor_getUCLYhndl(me->node_anchor, UCT_STAGE_HTEXT);
912
chndl = current_char_set;
913
HTAnchor_setUCInfoStage(me->node_anchor, chndl,
915
UCT_SETBY_STRUCTURED);
917
HTAnchor_setUCInfoStage(me->node_anchor, chndl,
918
UCT_STAGE_STRUCTURED,
919
UCT_SETBY_STRUCTURED);
920
me->UCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,
921
UCT_STAGE_STRUCTURED);
923
me->UCI = HTAnchor_getUCInfoStage(me->node_anchor,
924
UCT_STAGE_STRUCTURED);
928
* Given an UCS character code, will fill buffer passed in as q with
929
* the code's UTF-8 encoding.
930
* If terminate = YES, terminates string on success and returns pointer
932
* If terminate = NO, does not terminate string, and returns pointer
933
* next char after the UTF-8 put into buffer.
934
* On failure, including invalid code or 7-bit code, returns NULL.
936
PRIVATE char * UCPutUtf8ToBuffer ARGS3(char *, q, UCode_t, code, BOOL, terminate)
941
if (code > 127 && code < 0x7fffffffL) {
943
*q++ = (char)(0xc0 | (code>>6));
944
*q++ = (char)(0x80 | (0x3f & (code)));
945
} else if (code < 0x10000L) {
946
*q++ = (char)(0xe0 | (code>>12));
947
*q++ = (char)(0x80 | (0x3f & (code>>6)));
948
*q++ = (char)(0x80 | (0x3f & (code)));
949
} else if (code < 0x200000L) {
950
*q++ = (char)(0xf0 | (code>>18));
951
*q++ = (char)(0x80 | (0x3f & (code>>12)));
952
*q++ = (char)(0x80 | (0x3f & (code>>6)));
953
*q++ = (char)(0x80 | (0x3f & (code)));
954
} else if (code < 0x4000000L) {
955
*q++ = (char)(0xf8 | (code>>24));
956
*q++ = (char)(0x80 | (0x3f & (code>>18)));
957
*q++ = (char)(0x80 | (0x3f & (code>>12)));
958
*q++ = (char)(0x80 | (0x3f & (code>>6)));
959
*q++ = (char)(0x80 | (0x3f & (code)));
961
*q++ = (char)(0xfc | (code>>30));
962
*q++ = (char)(0x80 | (0x3f & (code>>24)));
963
*q++ = (char)(0x80 | (0x3f & (code>>18)));
964
*q++ = (char)(0x80 | (0x3f & (code>>12)));
965
*q++ = (char)(0x80 | (0x3f & (code>>6)));
966
*q++ = (char)(0x80 | (0x3f & (code)));
979
/* as in HTParse.c, saves some calls - kw */
980
PRIVATE CONST char *hex = "0123456789ABCDEF";
983
* Any raw 8-bit or multibyte characters already have been
984
* handled in relation to the display character set
985
* in SGML_character(), including named and numeric entities.
987
** This function used for translations HTML special fields inside tags
988
** (ALT=, VALUE=, etc.) from charset `cs_from' to charset `cs_to'.
989
** It also unescapes non-ASCII characters from URL (#fragments !)
990
** if st_URL is active.
992
** If `do_ent' is YES, it converts named entities
993
** and numeric character references (NCRs) to their `cs_to' replacements.
995
** Named entities converted to unicodes. NCRs (unicodes) converted
996
** by UCdomap.c chartrans functions.
997
** ???NCRs with values in the ISO-8859-1 range 160-255 may be converted
998
** to their HTML entity names (via old-style entities) and then translated
999
** according to the LYCharSets.c array for `cs_out'???.
1001
** Some characters (see descriptions in `put_special_unicodes' from SGML.c)
1002
** translated in relation with the state of boolean variables
1003
** `use_lynx_specials', `plain_space' and `hidden'. It is not clear yet:
1005
** If plain_space is TRUE, nbsp (160) will be treated as an ASCII
1006
** space (32). If hidden is TRUE, entities will be translated
1007
** (if `do_ent' is YES) but escape sequences will be passed unaltered.
1008
** If `hidden' is FALSE, some characters are converted to Lynx special
1009
** codes (see `put_special_unicodes') or ASCII space if `plain_space'
1010
** applies). @@ is `use_lynx_specials' needed, does it have any effect? @@
1011
** If `use_lynx_specials' is YES, translate byte values 160 and 173
1012
** meaning U+00A0 and U+00AD given as or converted from raw char input
1013
** are converted to HT_NON_BREAK_SPACE and LY_SOFT_HYPHEN, respectively
1014
** (unless input and output charset are both iso-8859-1, for compatibility
1015
** with previous usage in HTML.c) even if `hidden' or `plain_space' is set.
1017
** If `Back' is YES, the reverse is done instead i.e., Lynx special codes
1018
** in the input are translated back to character values.
1020
** If `Back' is YES, an attempt is made to use UCReverseTransChar() for
1021
** back translation which may be more efficient. (?)
1023
** If `stype' is st_URL, non-ASCII characters are URL-encoded instead.
1024
** The sequence of bytes being URL-encoded is the raw input character if
1025
** we couldn't translate it from `cs_in' (CJK etc.); otherwise it is the
1026
** UTF-8 representation if either `cs_to' requires this or if the
1027
** character's Unicode value is > 255, otherwise it should be the iso-8859-1
1029
** No general URL-encoding occurs for displayable ASCII characters and
1030
** spaces and some C0 controls valid in HTML (LF, TAB), it is expected
1031
** that other functions will take care of that as appropriate.
1033
** Escape characters (0x1B, '\033') are
1034
** - URL-encoded if `stype' is st_URL, otherwise
1035
** - dropped if `stype' is st_other, otherwise (i.e., st_HTML)
1036
** - passed if `hidden' is TRUE or HTCJK is set, otherwise
1039
** (If `stype' is st_URL or st_other most of the parameters really predefined:
1040
** cs_from=cs_to, use_lynx_specials=plain_space=NO, and hidden=YES)
1043
** Returns pointer to the char** passed in
1044
** if string translated or translation unnecessary,
1046
** (in which case something probably went wrong.)
1049
** In general, this somehow ugly function (KW)
1050
** cover three functions from v.2.7.2 (FM):
1051
** extern void LYExpandString PARAMS((
1052
** HTStructured * me,
1054
** extern void LYUnEscapeEntities PARAMS((
1055
** HTStructured * me,
1057
** extern void LYUnEscapeToLatinOne PARAMS((
1058
** HTStructured * me,
1063
PUBLIC char ** LYUCFullyTranslateString ARGS9(
1068
BOOL, use_lynx_specials,
1069
BOOLEAN, plain_space,
1076
HTChunk *chunk = NULL;
1080
char replace_buf [64];
1085
BOOL output_utf8 = 0, repl_translated_C0 = 0;
1087
CONST char * name = NULL;
1088
BOOLEAN no_bytetrans;
1090
BOOL from_is_utf8 = FALSE;
1093
{ S_text, S_esc, S_dollar, S_paren, S_nonascii_text, S_dollar_paren,
1094
S_trans_byte, S_check_ent, S_ncr, S_check_uni, S_named, S_check_name,
1096
S_got_oututf8, S_got_outstring, S_put_urlstring,
1097
S_got_outchar, S_put_urlchar, S_next_char, S_done} state = S_text;
1099
{ P_text, P_utf8, P_hex, P_decimal, P_named
1101
#ifdef KANJI_CODE_OVERRIDE
1102
static unsigned char sjis_1st = '\0';
1103
#ifdef CONV_JISX0201KANA_JISX0208KANA
1104
unsigned char sjis_str[3];
1109
** Make sure we have a non-empty string. - FM
1111
if (!str || isEmpty(*str))
1115
* FIXME: something's wrong with the limit checks here (clearing the
1118
memset(replace_buf, 0, sizeof(replace_buf));
1121
** Don't do byte translation
1122
** if original AND target character sets
1123
** are both iso-8859-1 (and we are not called to back-translate),
1124
** or if we are in CJK mode.
1126
if (HTCJK != NOCJK) {
1127
no_bytetrans = TRUE;
1128
} else if (cs_to <= 0 && cs_from == cs_to && (!Back || cs_to < 0)) {
1129
no_bytetrans = TRUE;
1131
/* No need to translate or examine the string any further */
1132
no_bytetrans = (BOOL) (!use_lynx_specials && !Back &&
1133
UCNeedNotTranslate(cs_from, cs_to));
1136
** Save malloc/calloc overhead in simple case - kw
1138
if (do_ent && hidden && (stype != st_URL) && (strchr(*str, '&') == NULL))
1141
/* Can't do, caller should figure out what to do... */
1142
if (!UCCanTranslateFromTo(cs_from, cs_to)) {
1145
if (!do_ent && no_bytetrans)
1147
no_bytetrans = TRUE;
1148
} else if (cs_to < 0) {
1152
if (!do_ent && no_bytetrans)
1156
if (!no_bytetrans) {
1157
UCTransParams_clear(&T);
1158
UCSetTransParams(&T, cs_from, &LYCharSet_UC[cs_from],
1159
cs_to, &LYCharSet_UC[cs_to]);
1160
from_is_utf8 = (BOOL) (LYCharSet_UC[cs_from].enc == UCT_ENC_UTF8);
1161
output_utf8 = T.output_utf8;
1162
repl_translated_C0 = T.repl_translated_C0;
1164
} else if (do_ent) {
1165
output_utf8 = (BOOL) (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8 ||
1166
HText_hasUTF8OutputSet(HTMainText));
1167
repl_translated_C0 = (BOOL) (LYCharSet_UC[cs_to].enc == UCT_ENC_8BIT_C0);
1170
lowest_8 = LYlowest_eightbit[cs_to];
1173
** Create a buffer string seven times the length of the original,
1174
** so we have plenty of room for expansions. - FM
1176
len = strlen(p) + 16;
1181
/* Create the HTChunk only if we need it */
1182
#define CHUNK (chunk ? chunk : (chunk = HTChunkCreate2(128, len+1)))
1184
#define REPLACE_STRING(s) \
1185
if (q != qs) HTChunkPutb(CHUNK, qs, q-qs); \
1186
HTChunkPuts(CHUNK, s); \
1189
#define REPLACE_CHAR(c) if (q > p) { \
1190
HTChunkPutb(CHUNK, qs, q-qs); \
1197
* Loop through string, making conversions as needed.
1199
* The while() checks for a non-'\0' char only for the normal
1200
* text states since other states may temporarily modify p or *p
1201
* (which should be restored before S_done!) - kw
1204
while (*p || (state != S_text && state != S_nonascii_text)) {
1208
#ifdef KANJI_CODE_OVERRIDE
1209
if (HTCJK == JAPANESE && last_kcode == SJIS) {
1210
if (sjis_1st == '\0' && (IS_SJIS_HI1(code)||IS_SJIS_HI2(code))){
1211
sjis_1st = UCH(code);
1212
} else if (sjis_1st && IS_SJIS_LO(code)) {
1215
#ifdef CONV_JISX0201KANA_JISX0208KANA
1216
if (0xA1 <= code && code <= 0xDF) {
1218
JISx0201TO0208_SJIS(UCH(code),
1219
sjis_str, sjis_str + 1);
1220
REPLACE_STRING(sjis_str);
1229
if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) {
1231
if (stype == st_URL) {
1232
REPLACE_STRING("%1B");
1235
} else if (stype != st_HTML) {
1242
} else if (!hidden) {
1244
** CJK handling not on, and not a hidden INPUT,
1245
** so block escape. - FM
1247
state = S_next_char;
1249
state = S_trans_byte;
1252
state = (do_ent ? S_check_ent : S_trans_byte);
1261
} else if (*p == '(') {
1271
if (*p == '@' || *p == 'B' || *p == 'A') {
1272
state = S_nonascii_text;
1275
} else if (*p == '(') {
1276
state = S_dollar_paren;
1284
case S_dollar_paren:
1286
state = S_nonascii_text;
1295
if (*p == 'B' || *p == 'J' || *p == 'T') {
1299
} else if (*p == 'I') {
1300
state = S_nonascii_text;
1308
case S_nonascii_text:
1310
if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) {
1312
if (stype == st_URL) {
1313
REPLACE_STRING("%1B");
1316
} else if (stype != st_HTML) {
1326
/* character translation goes here */
1328
** Don't do anything if we have no string,
1329
** or if original AND target character sets
1330
** are both iso-8859-1,
1331
** or if we are in CJK mode.
1333
if (*p == '\0' || no_bytetrans) {
1334
state = S_got_outchar;
1340
if ((*p) == HT_NON_BREAK_SPACE ||
1341
(*p) == HT_EN_SPACE) {
1344
state = S_got_outchar;
1348
if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||
1349
(LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) {
1350
state = S_got_outchar;
1352
} else if (!(LYCharSet_UC[cs_from].enc == UCT_ENC_8859
1353
||(LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {
1354
state = S_check_uni;
1357
*(unsigned char *)p = UCH(160);
1360
} else if ((*p) == LY_SOFT_HYPHEN) {
1362
if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||
1363
(LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) {
1364
state = S_got_outchar;
1366
} else if (!(LYCharSet_UC[cs_from].enc == UCT_ENC_8859
1367
||(LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {
1368
state = S_check_uni;
1371
*(unsigned char *)p = UCH(173);
1373
} else if (code < 127 || T.transp) {
1374
state = S_got_outchar;
1377
rev_c = UCReverseTransChar(*p, cs_to, cs_from);
1381
state = S_got_outchar;
1384
} else if (code < 127) {
1385
state = S_got_outchar;
1390
if (((*p)&0xc0)==0xc0) {
1392
code = UCGetUniFromUtf8String(&puni);
1399
} else if (use_lynx_specials && !Back &&
1400
(code == 160 || code == 173) &&
1401
(LYCharSet_UC[cs_from].enc == UCT_ENC_8859 ||
1402
(LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {
1404
code = *p = HT_NON_BREAK_SPACE;
1405
else if (code == 173)
1406
code = *p = LY_SOFT_HYPHEN;
1407
state = S_got_outchar;
1409
} else if (T.trans_to_uni) {
1410
code = UCTransToUni(*p, cs_from);
1412
/* What else can we do? */
1415
} else if (!T.trans_from_uni) {
1416
state = S_got_outchar;
1420
** Substitute Lynx special character for
1421
** 160 (nbsp) if use_lynx_specials is set.
1423
if (use_lynx_specials && !Back &&
1424
(code == 160 || code == 173)) {
1425
code = ((code==160 ? HT_NON_BREAK_SPACE : LY_SOFT_HYPHEN));
1426
state = S_got_outchar;
1430
state = S_check_uni;
1438
** Check for a numeric entity. - FM
1440
if (*pp == '#' && len > 2 &&
1441
(*(pp+1) == 'x' || *(pp+1) == 'X') &&
1442
UCH(*(pp+2)) < 127 &&
1443
isxdigit(UCH(*(pp+2)))) {
1446
} else if (*pp == '#' && len > 2 &&
1447
UCH(*(pp+1)) < 127 &&
1448
isdigit(UCH(*(pp+1)))) {
1451
} else if (UCH(*pp) < 127 &&
1452
isalpha(UCH(*pp))) {
1456
state = S_trans_byte;
1459
state = S_trans_byte;
1464
if (what == P_hex) {
1466
} else { /* P_decimal */
1470
while (*p && UCH(*p) < 127 &&
1471
(what == P_hex ? isxdigit(UCH(*p)) :
1472
isdigit(UCH(*p)))) {
1476
** Save the terminator and isolate the digit(s). - FM
1482
** Show the numeric entity if the value:
1483
** (1) Is greater than 255 and unhandled Unicode.
1484
** (2) Is less than 32, and not valid and we don't
1486
** (3) Is 127 and we don't have HTPassHighCtrlRaw
1488
** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set.
1490
if ((((what == P_hex) ? sscanf(cp, "%lx", &lcode) :
1491
sscanf(cp, "%ld", &lcode)) != 1) ||
1492
lcode > 0x7fffffffL || lcode < 0) {
1498
(code > 127 && code < 156)) {
1500
** Assume these are Microsoft code points, inflicted on
1501
** us by FrontPage. - FM
1503
** MS FrontPage uses syntax like ™ in 128-159
1504
** range and doesn't follow Unicode standards for this
1505
** area. Windows-1252 codepoints are assumed here.
1510
** WHITE SMILING FACE
1516
** EURO currency sign
1522
** SINGLE LOW-9 QUOTATION MARK (sbquo)
1528
** DOUBLE LOW-9 QUOTATION MARK (bdquo)
1534
** HORIZONTAL ELLIPSIS (hellip)
1546
** DOUBLE DAGGER (Dagger)
1552
** PER MILLE SIGN (permil)
1558
** SINGLE LEFT-POINTING ANGLE QUOTATION MARK
1565
** LEFT SINGLE QUOTATION MARK (lsquo)
1571
** RIGHT SINGLE QUOTATION MARK (rsquo)
1577
** LEFT DOUBLE QUOTATION MARK (ldquo)
1583
** RIGHT DOUBLE QUOTATION MARK (rdquo)
1607
** SMALL TILDE (tilde)
1613
** TRADE MARK SIGN (trade)
1619
** SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
1626
** Do not attempt a conversion
1627
** to valid Unicode values.
1632
state = S_check_uni;
1638
** Show the numeric entity if the value:
1639
** (2) Is less than 32, and not valid and we don't
1641
** (3) Is 127 and we don't have HTPassHighCtrlRaw
1643
** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set.
1646
code != 9 && code != 10 && code != 13 &&
1649
!(HTPassHighCtrlRaw || HTCJK != NOCJK)) ||
1650
(code > 127 && code < 160 &&
1651
!HTPassHighCtrlNum)) {
1656
** Convert the value as an unsigned char,
1657
** hex escaped if isURL is set and it's
1658
** 8-bit, and then recycle the terminator
1659
** if it is not a semicolon. - FM
1661
if (code > 159 && stype == st_URL) {
1662
state = S_got_oututf8;
1666
** For 160 (nbsp), use that value if it's
1667
** a hidden INPUT, otherwise use an ASCII
1668
** space (32) if plain_space is TRUE,
1669
** otherwise use the Lynx special character. - FM
1674
state = S_got_outchar;
1676
} else if (use_lynx_specials) {
1677
code = HT_NON_BREAK_SPACE;
1678
state = S_got_outchar;
1680
} else if ((hidden && !Back) ||
1681
(LYCharSet_UC[cs_to].codepoints & UCT_CP_SUPERSETOF_LAT1) ||
1682
LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||
1683
(LYCharSet_UC[cs_to].like8859 &
1685
state = S_got_outchar;
1688
(LYCharSet_UC[cs_to].repertoire & UCT_REP_SUPERSETOF_LAT1)) {
1689
; /* nothing, may be translated later */
1692
state = S_got_outchar;
1697
** For 173 (shy), use that value if it's
1698
** a hidden INPUT, otherwise ignore it
1699
** if plain_space is TRUE, otherwise use
1700
** the Lynx special character. - FM
1704
replace_buf[0] = '\0';
1705
state = S_got_outstring;
1708
!(LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||
1709
(LYCharSet_UC[cs_to].like8859 &
1710
UCT_R_8859SPECL))) {
1711
; /* nothing, may be translated later */
1712
} else if (hidden || Back) {
1713
state = S_got_outchar;
1715
} else if (use_lynx_specials) {
1716
code = LY_SOFT_HYPHEN;
1717
state = S_got_outchar;
1722
** Seek a translation from the chartrans tables.
1724
if ((uck = UCTransUniChar(code,
1727
(uck < 127 || uck >= lowest_8)) {
1729
state = S_got_outchar;
1731
} else if ((uck == -4 ||
1732
(repl_translated_C0 &&
1733
uck > 0 && uck < 32)) &&
1735
** Not found; look for replacement string.
1737
(uck = UCTransUniCharStr(replace_buf,
1741
state = S_got_outstring;
1745
code > 127 && code < 0x7fffffffL) {
1746
state = S_got_oututf8;
1750
** For 8194 (ensp), 8195 (emsp), or 8201 (thinsp),
1751
** use the character reference if it's a hidden INPUT,
1752
** otherwise use an ASCII space (32) if plain_space is
1753
** TRUE, otherwise use the Lynx special character. - FM
1755
if (code == 8194 || code == 8195 || code == 8201) {
1758
} else if (plain_space) {
1760
state = S_got_outchar;
1763
state = S_got_outchar;
1767
** Ignore 8204 (zwnj), 8205 (zwj)
1768
** 8206 (lrm), and 8207 (rlm),
1769
** for now, if we got this far without
1770
** finding a representation for them.
1772
} else if (code == 8204 || code == 8205 ||
1773
code == 8206 || code == 8207) {
1774
CTRACE((tfp, "LYUCFullyTranslateString: Ignoring '%ld'.\n", code));
1775
replace_buf[0] = '\0';
1776
state = S_got_outstring;
1779
** Show the numeric entity if the value:
1780
** (1) Is greater than 255 and unhandled Unicode.
1782
} else if (code > 255) {
1784
** Illegal or not yet handled value.
1785
** Return "&#" verbatim and continue
1791
** If it's ASCII, or is 8-bit but HTPassEightBitNum
1792
** is set or the character set is "ISO Latin 1",
1793
** use it's value. - FM
1795
} else if (code < 161 ||
1797
(HTPassEightBitNum || cs_to == LATIN1))) {
1799
** No conversion needed.
1801
state = S_got_outchar;
1804
/* The following disabled section doesn't make sense
1805
** any more. It used to make sense in the past, when
1806
** S_check_named would look in "old style" tables
1807
** in addition to what it does now.
1808
** Disabling of going to S_check_name here prevents
1809
** endless looping between S_check_uni and S_check_names
1810
** states, which could occur here for Latin 1 codes
1811
** for some cs_to if they had no translation in that
1812
** cs_to. Normally all cs_to *should* now have valid
1813
** translations via UCTransUniChar or UCTransUniCharStr
1814
** for all Latin 1 codes, so that we would not get here
1815
** anyway, and no loop could occur. Still, if we *do*
1816
** get here, FALL THROUGH to case S_recover now. - kw
1820
** If we get to here, convert and handle
1821
** the character as a named entity. - FM
1824
name = HTMLGetEntityName(code - 160);
1825
state = S_check_name;
1831
if (what == P_decimal || what == P_hex) {
1833
** Illegal or not yet handled value.
1834
** Return "&#" verbatim and continue
1845
} else if (what == P_named) {
1849
} else if (!T.output_utf8 && stype == st_HTML && !hidden &&
1850
!(HTPassEightBitRaw &&
1851
UCH(*p) >= lowest_8)) {
1852
sprintf(replace_buf, "U%.2lX", code);
1853
state = S_got_outstring;
1857
state = S_got_outchar;
1863
while (*cp && UCH(*cp) < 127 &&
1869
state = S_check_name;
1874
** Seek the Unicode value for the named entity.
1876
** !!!! We manually recover the case of '=' terminator which
1877
** is commonly found on query to CGI-scripts
1878
** enclosed as href= URLs like "somepath/?x=1&yz=2"
1879
** Without this dirty fix, submission of such URLs was broken
1880
** if &yz string happened to be a recognized entity name. - LP
1882
if ( ((code = HTMLGetEntityUCValue(name)) > 0) &&
1883
!((cpe == '=') && (stype == st_URL)) ) {
1884
state = S_check_uni;
1888
** Didn't find the entity.
1894
/* * * O U T P U T S T A T E S * * */
1898
(code >= 128 && LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8)) {
1899
UCPutUtf8ToBuffer(replace_buf, code, YES);
1900
state = S_got_outstring;
1902
state = S_got_outchar;
1905
case S_got_outstring:
1906
if (what == P_decimal || what == P_hex) {
1907
if (cpe != ';' && cpe != '\0')
1910
} else if (what == P_named) {
1912
p = (*cp != ';') ? (cp - 1) : cp;
1913
} else if (what == P_utf8) {
1916
if (replace_buf[0] == '\0') {
1917
state = S_next_char;
1920
if (stype == st_URL) {
1921
code = replace_buf[0]; /* assume string OK if first char is */
1923
(code < 32 && (code != 9 && code != 10 && code != 0))) {
1924
state = S_put_urlstring;
1928
REPLACE_STRING(replace_buf);
1929
state = S_next_char;
1931
case S_put_urlstring:
1932
esc = HTEscape(replace_buf, URL_XALPHAS);
1933
REPLACE_STRING(esc);
1935
state = S_next_char;
1938
if (what == P_decimal || what == P_hex) {
1939
if (cpe != ';' && cpe != '\0')
1942
} else if (what == P_named) {
1944
p = (*cp != ';') ? (cp - 1) : cp;
1945
} else if (what == P_utf8) {
1948
if (stype == st_URL &&
1949
/* Not a full HTEscape, only for 8bit and ctrl chars */
1950
(TOASCII(code) >= 127 || /* S/390 -- gil -- 1925 */
1951
(code < ' ' && (code != '\t' && code != '\n')))) {
1952
state = S_put_urlchar;
1954
} else if (!hidden && code == 10 && *p == 10
1955
&& q != qs && *(q-1) == 13) {
1957
** If this is not a hidden string, and the current char is
1958
** the LF ('\n') of a CRLF pair, drop the CR ('\r'). - KW
1965
state = S_next_char;
1969
REPLACE_CHAR(hex[(TOASCII(code) >> 4) & 15]); /* S/390 -- gil -- 1944 */
1970
REPLACE_CHAR(hex[(TOASCII(code) & 15)]);
1973
p++; /* fall through */
1977
/* for next round */
1983
HTChunkPutb(CHUNK, qs, q-qs + 1); /* also terminates */
1984
if (stype == st_URL || stype == st_other) {
1985
LYTrimHead(chunk->data);
1986
LYTrimTail(chunk->data);
1988
StrAllocCopy(*str, chunk->data);
1991
if (stype == st_URL || stype == st_other) {
2000
#undef REPLACE_STRING
2002
PUBLIC BOOL LYUCTranslateHTMLString ARGS7(
2006
BOOL, use_lynx_specials,
2007
BOOLEAN, plain_space,
2012
/* May reallocate *str even if cs_to == 0 */
2013
if (!LYUCFullyTranslateString(str, cs_from, cs_to, TRUE,
2014
use_lynx_specials, plain_space, hidden,
2021
PUBLIC BOOL LYUCTranslateBackFormData ARGS4(
2025
BOOLEAN, plain_space)
2028
/* May reallocate *str */
2029
ret = (LYUCFullyTranslateString(str, cs_from, cs_to, FALSE,
2030
NO, plain_space, YES,
2032
return (BOOL) (ret != NULL);
2036
* Parse a parameter from an HTML META tag, i.e., the CONTENT.
2038
PUBLIC char *LYParseTagParam ARGS2(
2042
size_t len = strlen(name);
2043
char *result = NULL;
2044
char *string = from;
2047
if ((string = strchr(string, ';')) == NULL)
2049
while (*string != '\0' && (*string == ';' || isspace(UCH(*string)))) {
2052
if (strlen(string) < len) return NULL;
2053
} while (strncasecomp(string, name, len) != 0);
2055
while (*string != '\0' && (UCH(isspace(*string)) || *string == '=')) {
2059
StrAllocCopy(result, string);
2061
while (isprint(UCH(string[len])) && !isspace(UCH(string[len]))) {
2067
* Strip single quotes, just in case.
2069
if (len > 2 && result[0] == '\'' && result[len-1] == result[0]) {
2070
result[len-1] = '\0';
2071
for (string = result; (string[0] = string[1]) != '\0'; ++string)
2078
* Given a refresh-URL content string, parses the delay time and the URL
2079
* string. Ignore the remainder of the content.
2081
PUBLIC void LYParseRefreshURL ARGS3(
2088
char *Seconds = NULL;
2091
* Look for the Seconds field. - FM
2093
cp = LYSkipBlanks(content);
2094
if (*cp && isdigit(UCH(*cp))) {
2096
while (*cp1 && isdigit(UCH(*cp1)))
2098
StrnAllocCopy(Seconds, cp, cp1 - cp);
2100
*p_seconds = Seconds;
2101
*p_address = LYParseTagParam(content, "URL");
2103
CTRACE((tfp, "LYParseRefreshURL\n\tcontent: %s\n\tseconds: %s\n\taddress: %s\n",
2104
content, NonNull(*p_seconds), NonNull(*p_address)));
2108
** This function processes META tags in HTML streams. - FM
2110
PUBLIC void LYHandleMETA ARGS4(
2112
CONST BOOL*, present,
2113
CONST char **, value,
2114
char **, include GCC_UNUSED)
2116
char *http_equiv = NULL, *name = NULL, *content = NULL;
2117
char *href = NULL, *id_string = NULL, *temp = NULL;
2118
char *cp, *cp0, *cp1 = NULL;
2121
if (!me || !present)
2125
* Load the attributes for possible use by Lynx. - FM
2127
if (present[HTML_META_HTTP_EQUIV] &&
2128
value[HTML_META_HTTP_EQUIV] && *value[HTML_META_HTTP_EQUIV]) {
2129
StrAllocCopy(http_equiv, value[HTML_META_HTTP_EQUIV]);
2130
convert_to_spaces(http_equiv, TRUE);
2131
LYUCTranslateHTMLString(&http_equiv, me->tag_charset, me->tag_charset,
2132
NO, NO, YES, st_other);
2133
if (*http_equiv == '\0') {
2137
if (present[HTML_META_NAME] &&
2138
value[HTML_META_NAME] && *value[HTML_META_NAME]) {
2139
StrAllocCopy(name, value[HTML_META_NAME]);
2140
convert_to_spaces(name, TRUE);
2141
LYUCTranslateHTMLString(&name, me->tag_charset, me->tag_charset,
2142
NO, NO, YES, st_other);
2143
if (*name == '\0') {
2147
if (present[HTML_META_CONTENT] &&
2148
value[HTML_META_CONTENT] && *value[HTML_META_CONTENT]) {
2150
* Technically, we should be creating a comma-separated
2151
* list, but META tags come one at a time, and we'll
2152
* handle (or ignore) them as each is received. Also,
2153
* at this point, we only trim leading and trailing
2154
* blanks from the CONTENT value, without translating
2155
* any named entities or numeric character references,
2156
* because how we should do that depends on what type
2157
* of information it contains, and whether or not any
2158
* of it might be sent to the screen. - FM
2160
StrAllocCopy(content, value[HTML_META_CONTENT]);
2161
convert_to_spaces(content, FALSE);
2162
LYTrimHead(content);
2163
LYTrimTail(content);
2164
if (*content == '\0') {
2168
CTRACE((tfp, "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\"\n",
2169
(http_equiv ? http_equiv : "NULL"),
2170
(name ? name : "NULL"),
2171
(content ? content : "NULL")));
2174
* Make sure we have META name/value pairs to handle. - FM
2176
if (!(http_equiv || name) || !content)
2177
goto free_META_copies;
2180
* Check for a no-cache Pragma
2181
* or Cache-Control directive. - FM
2183
if (!strcasecomp(NonNull(http_equiv), "Pragma") ||
2184
!strcasecomp(NonNull(http_equiv), "Cache-Control")) {
2185
LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
2186
NO, NO, YES, st_other);
2187
if (!strcasecomp(content, "no-cache")) {
2188
me->node_anchor->no_cache = TRUE;
2189
HText_setNoCache(me->text);
2193
* If we didn't get a Cache-Control MIME header,
2194
* and the META has one, convert to lowercase,
2195
* store it in the anchor element, and if we
2196
* haven't yet set no_cache, check whether we
2199
if ((!me->node_anchor->cache_control) &&
2200
!strcasecomp(NonNull(http_equiv), "Cache-Control")) {
2201
LYLowerCase(content);
2202
StrAllocCopy(me->node_anchor->cache_control, content);
2203
if (me->node_anchor->no_cache == FALSE) {
2205
while ((cp = strstr(cp0, "no-cache")) != NULL) {
2207
while (*cp != '\0' && WHITE(*cp))
2209
if (*cp == '\0' || *cp == ';') {
2210
me->node_anchor->no_cache = TRUE;
2211
HText_setNoCache(me->text);
2216
if (me->node_anchor->no_cache == TRUE)
2217
goto free_META_copies;
2219
while ((cp = strstr(cp0, "max-age")) != NULL) {
2221
while (*cp != '\0' && WHITE(*cp))
2225
while (*cp != '\0' && WHITE(*cp))
2227
if (isdigit(UCH(*cp))) {
2229
while (isdigit(UCH(*cp)))
2231
if (*cp0 == '0' && cp == (cp0 + 1)) {
2232
me->node_anchor->no_cache = TRUE;
2233
HText_setNoCache(me->text);
2244
* Check for an Expires directive. - FM
2246
} else if (!strcasecomp(NonNull(http_equiv), "Expires")) {
2248
* If we didn't get an Expires MIME header,
2249
* store it in the anchor element, and if we
2250
* haven't yet set no_cache, check whether we
2251
* should. Note that we don't accept a Date
2252
* header via META tags, because it's likely
2253
* to be untrustworthy, but do check for a
2254
* Date header from a server when making the
2257
LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
2258
NO, NO, YES, st_other);
2259
StrAllocCopy(me->node_anchor->expires, content);
2260
if (me->node_anchor->no_cache == FALSE) {
2261
if (!strcmp(content, "0")) {
2263
* The value is zero, which we treat as
2264
* an absolute no-cache directive. - FM
2266
me->node_anchor->no_cache = TRUE;
2267
HText_setNoCache(me->text);
2268
} else if (me->node_anchor->date != NULL) {
2270
* We have a Date header, so check if
2271
* the value is less than or equal to
2274
if (LYmktime(content, TRUE) <=
2275
LYmktime(me->node_anchor->date, TRUE)) {
2276
me->node_anchor->no_cache = TRUE;
2277
HText_setNoCache(me->text);
2279
} else if (LYmktime(content, FALSE) == 0) {
2281
* We don't have a Date header, and
2282
* the value is in past for us. - FM
2284
me->node_anchor->no_cache = TRUE;
2285
HText_setNoCache(me->text);
2290
* Check for a text/html Content-Type with a
2291
* charset directive, if we didn't already set
2292
* the charset via a server's header. - AAC & FM
2294
} else if (!(me->node_anchor->charset && *me->node_anchor->charset) &&
2295
!strcasecomp(NonNull(http_equiv), "Content-Type")) {
2296
LYUCcharset * p_in = NULL;
2297
LYUCcharset * p_out = NULL;
2298
LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
2299
NO, NO, YES, st_other);
2300
LYLowerCase(content);
2302
if ((cp1 = strstr(content, "charset")) != NULL) {
2303
BOOL chartrans_ok = NO;
2304
char *cp3 = NULL, *cp4;
2308
while (*cp1 == ' ' || *cp1 == '=' || *cp1 == '"')
2311
StrAllocCopy(cp3, cp1); /* copy to mutilate more */
2312
for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' &&
2313
*cp4 != ';' && *cp4 != ':' &&
2314
!WHITE(*cp4)); cp4++) {
2319
chndl = UCGetLYhndl_byMIME(cp3);
2321
#ifdef CAN_SWITCH_DISPLAY_CHARSET
2322
/* Allow a switch to a more suitable display charset */
2323
if (Switch_Display_Charset (chndl, SWITCH_DISPLAY_CHARSET_MAYBE)) {
2324
/* UCT_STAGE_STRUCTURED and UCT_STAGE_HTEXT
2325
should have the same setting for UCInfoStage. */
2326
int structured = HTAnchor_getUCInfoStage(me->node_anchor,
2327
UCT_STAGE_STRUCTURED);
2328
me->outUCLYhndl = current_char_set;
2329
HTAnchor_setUCInfoStage(me->node_anchor,
2332
UCT_SETBY_MIME); /* highest priorty! */
2333
HTAnchor_setUCInfoStage(me->node_anchor,
2335
UCT_STAGE_STRUCTURED,
2336
UCT_SETBY_MIME); /* highest priorty! */
2337
me->outUCI = HTAnchor_getUCInfoStage(me->node_anchor,
2339
/* The SGML stage will be reset in change_chartrans_handling */
2343
if (UCCanTranslateFromTo(chndl, current_char_set)) {
2345
StrAllocCopy(me->node_anchor->charset, cp4);
2346
HTAnchor_setUCInfoStage(me->node_anchor, chndl,
2348
UCT_SETBY_STRUCTURED);
2349
} else if (chndl < 0) {
2351
* Got something but we don't recognize it.
2353
chndl = UCLYhndl_for_unrec;
2354
if (chndl < 0) /* UCLYhndl_for_unrec not defined :-( */
2355
chndl = UCLYhndl_for_unspec; /* always >= 0 */
2356
if (UCCanTranslateFromTo(chndl, current_char_set)) {
2358
HTAnchor_setUCInfoStage(me->node_anchor, chndl,
2360
UCT_SETBY_STRUCTURED);
2364
p_in = HTAnchor_getUCInfoStage(me->node_anchor,
2366
p_out = HTAnchor_setUCInfoStage(me->node_anchor,
2374
p_out = HTAnchor_getUCInfoStage(me->node_anchor,
2377
if (!strcmp(p_in->MIMEname, "x-transparent")) {
2378
HTPassEightBitRaw = TRUE;
2379
HTAnchor_setUCInfoStage(me->node_anchor,
2380
HTAnchor_getUCLYhndl(me->node_anchor,
2385
if (!strcmp(p_out->MIMEname, "x-transparent")) {
2386
HTPassEightBitRaw = TRUE;
2387
HTAnchor_setUCInfoStage(me->node_anchor,
2388
HTAnchor_getUCLYhndl(me->node_anchor,
2393
if (p_in->enc != UCT_ENC_CJK) {
2395
if (!(p_in->codepoints &
2396
UCT_CP_SUBSETOF_LAT1) &&
2397
chndl == current_char_set) {
2398
HTPassEightBitRaw = TRUE;
2400
} else if (p_out->enc == UCT_ENC_CJK) {
2401
Set_HTCJK(p_in->MIMEname, p_out->MIMEname);
2403
LYGetChartransInfo(me);
2405
** Update the chartrans info homologously to
2406
** a Content-Type MIME header with a charset
2409
if (me->UCLYhndl != chndl) {
2410
HTAnchor_setUCInfoStage(me->node_anchor, chndl,
2412
UCT_SETBY_STRUCTURED);
2413
HTAnchor_setUCInfoStage(me->node_anchor, chndl,
2415
UCT_SETBY_STRUCTURED);
2416
me->inUCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,
2418
me->inUCI = HTAnchor_getUCInfoStage(me->node_anchor,
2421
UCSetTransParams(&me->T,
2422
me->inUCLYhndl, me->inUCI,
2423
me->outUCLYhndl, me->outUCI);
2427
* If according to some heuristic the given
2428
* charset and the current display character
2429
* both are likely to be like ISO-8859 in
2430
* structure, pretend we have some kind
2434
= (BOOL) (!strncmp(cp4, "iso-8859-", 9) &&
2435
isdigit(UCH(cp4[9])));
2436
BOOL given_is_8859like
2437
= (BOOL) (given_is_8859 || !strncmp(cp4, "windows-", 8) ||
2438
!strncmp(cp4, "cp12", 4) ||
2439
!strncmp(cp4, "cp-12", 5));
2440
BOOL given_and_display_8859like
2441
= (BOOL) (given_is_8859like &&
2442
(strstr(LYchar_set_names[current_char_set],
2444
strstr(LYchar_set_names[current_char_set],
2447
if (given_is_8859) {
2450
isdigit(UCH((*cp1))))
2454
if (given_and_display_8859like) {
2455
StrAllocCopy(me->node_anchor->charset, cp4);
2456
HTPassEightBitRaw = TRUE;
2458
HTAlert(*cp4 ? cp4 : me->node_anchor->charset);
2463
if (me->node_anchor->charset) {
2465
"LYHandleMETA: New charset: %s\n",
2466
me->node_anchor->charset));
2470
* Set the kcode element based on the charset. - FM
2472
HText_setKcode(me->text, me->node_anchor->charset, p_in);
2475
* Check for a Refresh directive. - FM
2477
} else if (!strcasecomp(NonNull(http_equiv), "Refresh")) {
2478
char *Seconds = NULL;
2480
LYParseRefreshURL(content, &Seconds, &href);
2485
* We found a URL field, so check it out. - FM
2487
if (!(url_type = LYLegitimizeHREF(me, &href, TRUE, FALSE))) {
2489
* The specs require a complete URL,
2490
* but this is a Netscapism, so don't
2491
* expect the author to know that. - FM
2493
HTUserMsg(REFRESH_URL_NOT_ABSOLUTE);
2495
* Use the document's address
2498
if (*href != '\0') {
2499
temp = HTParse(href,
2500
me->node_anchor->address, PARSE_ALL);
2501
StrAllocCopy(href, temp);
2504
StrAllocCopy(href, me->node_anchor->address);
2505
HText_setNoCache(me->text);
2510
* Check whether to fill in localhost. - FM
2512
LYFillLocalFileURL(&href,
2514
me->base_href : me->node_anchor->address));
2518
* Set the no_cache flag if the Refresh URL
2519
* is the same as the document's address. - FM
2521
if (!strcmp(href, me->node_anchor->address)) {
2522
HText_setNoCache(me->text);
2526
* We didn't find a URL field, so use
2527
* the document's own address and set
2528
* the no_cache flag. - FM
2530
StrAllocCopy(href, me->node_anchor->address);
2531
HText_setNoCache(me->text);
2534
* Check for an anchor in http or https URLs. - FM
2537
#ifndef DONT_TRACK_INTERNAL_LINKS
2538
/* id_string seems to be used wrong below if given.
2539
not that it matters much. avoid setting it here. - kw */
2540
if ((strncmp(href, "http", 4) == 0) &&
2541
(cp = strchr(href, '#')) != NULL) {
2542
StrAllocCopy(id_string, cp);
2548
* Ugh! The META tag, which is a HEAD element,
2549
* is in an Anchor, which is BODY element. All
2550
* we can do is close the Anchor and cross our
2553
if (me->inBoldA == TRUE && me->inBoldH == FALSE)
2554
HText_appendCharacter(me->text, LY_BOLD_END_CHAR);
2555
me->inBoldA = FALSE;
2556
HText_endAnchor(me->text, me->CurrentANum);
2558
me->CurrentANum = 0;
2560
me->CurrentA = HTAnchor_findChildAndLink(
2561
me->node_anchor, /* Parent */
2562
id_string, /* Tag */
2563
href, /* Addresss */
2564
(void *)0); /* Type */
2568
LYEnsureSingleSpace(me);
2569
if (me->inUnderline == FALSE)
2570
HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR);
2571
HTML_put_string(me, "REFRESH(");
2572
HTML_put_string(me, Seconds);
2573
HTML_put_string(me, " sec):");
2575
if (me->inUnderline == FALSE)
2576
HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR);
2577
HTML_put_character(me, ' ');
2579
HText_beginAnchor(me->text, me->inUnderline, me->CurrentA);
2580
if (me->inBoldH == FALSE)
2581
HText_appendCharacter(me->text, LY_BOLD_START_CHAR);
2582
HTML_put_string(me, href);
2584
if (me->inBoldH == FALSE)
2585
HText_appendCharacter(me->text, LY_BOLD_END_CHAR);
2586
HText_endAnchor(me->text, 0);
2587
LYEnsureSingleSpace(me);
2591
* Check for a suggested filename via a Content-Disposition with
2592
* a filename=name.suffix in it, if we don't already have it
2593
* via a server header. - FM
2595
} else if (!(me->node_anchor->SugFname && *me->node_anchor->SugFname) &&
2596
!strcasecomp((http_equiv ?
2597
http_equiv : ""), "Content-Disposition")) {
2599
while (*cp != '\0' && strncasecomp(cp, "filename", 8))
2603
while ((*cp != '\0') && (WHITE(*cp) || *cp == '='))
2605
while (*cp != '\0' && WHITE(*cp))
2608
StrAllocCopy(me->node_anchor->SugFname, cp);
2609
if (*me->node_anchor->SugFname == '\"') {
2610
if ((cp = strchr((me->node_anchor->SugFname + 1),
2613
HTMIME_TrimDoubleQuotes(me->node_anchor->SugFname);
2615
FREE(me->node_anchor->SugFname);
2617
if (me->node_anchor->SugFname != NULL &&
2618
*me->node_anchor->SugFname == '\0') {
2619
FREE(me->node_anchor->SugFname);
2622
if ((cp = me->node_anchor->SugFname) != NULL) {
2623
while (*cp != '\0' && !WHITE(*cp))
2626
if (*me->node_anchor->SugFname == '\0')
2627
FREE(me->node_anchor->SugFname);
2632
* Check for a Set-Cookie directive. - AK
2634
} else if (!strcasecomp(NonNull(http_equiv), "Set-Cookie")) {
2636
* This will need to be updated when Set-Cookie/Set-Cookie2
2637
* handling is finalized. For now, we'll still assume
2638
* "historical" cookies in META directives. - FM
2640
url_type = is_url(me->inBASE ?
2641
me->base_href : me->node_anchor->address);
2642
if (url_type == HTTP_URL_TYPE || url_type == HTTPS_URL_TYPE) {
2643
LYSetCookie(content,
2646
me->base_href : me->node_anchor->address));
2651
* Free the copies. - FM
2660
** This function handles P elements in HTML streams.
2661
** If start is TRUE it handles a start tag, and if
2662
** FALSE, an end tag. We presently handle start
2663
** and end tags identically, but this can lead to
2664
** a different number of blank lines between the
2665
** current paragraph and subsequent text when a P
2666
** end tag is present or not in the markup. - FM
2668
PUBLIC void LYHandlePlike ARGS6(
2670
CONST BOOL*, present,
2671
CONST char **, value,
2672
char **, include GCC_UNUSED,
2678
* FIG content should be a true block, which like P inherits
2679
* the current style. APPLET is like character elements or
2680
* an ALT attribute, unless it content contains a block element.
2681
* If we encounter a P in either's content, we set flags to treat
2682
* the content as a block. - FM
2686
me->inFIGwithP = TRUE;
2689
me->inAPPLETwithP = TRUE;
2693
if (me->List_Nesting_Level >= 0) {
2695
* We're in a list. Treat P as an instruction to
2696
* create one blank line, if not already present,
2697
* then fall through to handle attributes, with
2698
* the "second line" margins. - FM
2701
if (me->inFIG || me->inAPPLET ||
2702
me->inCAPTION || me->inCREDIT ||
2703
me->sp->style->spaceAfter > 0 ||
2704
(start && me->sp->style->spaceBefore > 0)) {
2705
LYEnsureDoubleSpace(me);
2707
LYEnsureSingleSpace(me);
2710
} else if (me->sp[0].tag_number == HTML_ADDRESS) {
2712
* We're in an ADDRESS. Treat P as an instruction
2713
* to start a newline, if needed, then fall through
2714
* to handle attributes. - FM
2716
if (!HText_LastLineEmpty(me->text, FALSE)) {
2717
HText_setLastChar(me->text, ' '); /* absorb white space */
2718
HText_appendCharacter(me->text, '\r');
2722
if (!(me->inLABEL && !me->inP)) {
2723
HText_appendParagraph(me->text);
2725
} else if (me->sp->style->spaceAfter > 0) {
2726
LYEnsureDoubleSpace(me);
2728
LYEnsureSingleSpace(me);
2730
me->inLABEL = FALSE;
2734
if (LYoverride_default_alignment(me)) {
2735
me->sp->style->alignment = LYstyles(me->sp[0].tag_number)->alignment;
2736
} else if ((me->List_Nesting_Level >= 0 &&
2737
(me->sp->style->id == ST_DivCenter ||
2738
me->sp->style->id == ST_DivLeft ||
2739
me->sp->style->id == ST_DivRight)) ||
2740
((me->Division_Level < 0) &&
2741
(me->sp->style->id == ST_Normal ||
2742
me->sp->style->id == ST_Preformatted))) {
2743
me->sp->style->alignment = HT_LEFT;
2745
me->sp->style->alignment = (short) me->current_default_alignment;
2749
if (present && present[align_idx] && value[align_idx]) {
2750
if (!strcasecomp(value[align_idx], "center") &&
2751
!(me->List_Nesting_Level >= 0 && !me->inP))
2752
me->sp->style->alignment = HT_CENTER;
2753
else if (!strcasecomp(value[align_idx], "right") &&
2754
!(me->List_Nesting_Level >= 0 && !me->inP))
2755
me->sp->style->alignment = HT_RIGHT;
2756
else if (!strcasecomp(value[align_idx], "left") ||
2757
!strcasecomp(value[align_idx], "justify"))
2758
me->sp->style->alignment = HT_LEFT;
2764
* Mark that we are starting a new paragraph
2765
* and don't have any of it's text yet. - FM
2775
** This function handles SELECT elements in HTML streams.
2776
** If start is TRUE it handles a start tag, and if FALSE,
2779
PUBLIC void LYHandleSELECT ARGS5(
2781
CONST BOOL*, present,
2782
CONST char **, value,
2783
char **, include GCC_UNUSED,
2788
if (start == TRUE) {
2790
BOOLEAN multiple = NO;
2794
* Initialize the disable attribute.
2796
me->select_disabled = FALSE;
2799
* Make sure we're in a form.
2804
"Bad HTML: SELECT start tag not within FORM tag\n"));
2807
* We should have covered all crash possibilities with the
2808
* current TagSoup parser, so we'll allow it because some
2809
* people with other browsers use SELECT for "information"
2810
* popups, outside of FORM blocks, though no Lynx user
2811
* would do anything that awful, right? - FM
2818
* Check for unclosed TEXTAREA.
2820
if (me->inTEXTAREA) {
2822
CTRACE((tfp, "Bad HTML: Missing TEXTAREA end tag\n"));
2826
* Set to know we are in a select tag.
2828
me->inSELECT = TRUE;
2830
if (!(present && present[HTML_SELECT_NAME] &&
2831
value[HTML_SELECT_NAME] && *value[HTML_SELECT_NAME])) {
2832
StrAllocCopy(name, "");
2833
} else if (strchr(value[HTML_SELECT_NAME], '&') == NULL) {
2834
StrAllocCopy(name, value[HTML_SELECT_NAME]);
2836
StrAllocCopy(name, value[HTML_SELECT_NAME]);
2837
UNESCAPE_FIELDNAME_TO_STD(&name);
2839
if (present && present[HTML_SELECT_MULTIPLE])
2841
if (present && present[HTML_SELECT_DISABLED])
2842
me->select_disabled = TRUE;
2843
if (present && present[HTML_SELECT_SIZE] &&
2844
value[HTML_SELECT_SIZE] && *value[HTML_SELECT_SIZE]) {
2846
* Let the size be determined by the number of OPTIONs. - FM
2848
CTRACE((tfp, "LYHandleSELECT: Ignoring SIZE=\"%s\" for SELECT.\n",
2849
value[HTML_SELECT_SIZE]));
2852
if (me->inBoldH == TRUE &&
2853
(multiple == NO || LYSelectPopups == FALSE)) {
2854
HText_appendCharacter(me->text, LY_BOLD_END_CHAR);
2855
me->inBoldH = FALSE;
2856
me->needBoldH = TRUE;
2858
if (me->inUnderline == TRUE &&
2859
(multiple == NO || LYSelectPopups == FALSE)) {
2860
HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR);
2861
me->inUnderline = FALSE;
2864
if ((multiple == NO && LYSelectPopups == TRUE) &&
2865
(me->sp[0].tag_number == HTML_PRE || me->inPRE == TRUE ||
2866
!me->sp->style->freeFormat) &&
2867
HText_LastLineSize(me->text, FALSE) > (LYcols - 8)) {
2869
* Force a newline when we're using a popup in
2870
* a PRE block and are within 7 columns from the
2871
* right margin. This will allow for the '['
2872
* popup designator and help avoid a wrap in the
2873
* underscore placeholder for the retracted popup
2874
* entry in the HText structure. - FM
2876
HTML_put_character(me, '\n');
2880
LYCheckForID(me, present, value, (int)HTML_SELECT_ID);
2882
HText_beginSelect(name, ATTR_CS_IN, multiple, size);
2886
me->first_option = TRUE;
2894
* Make sure we had a select start tag.
2896
if (!me->inSELECT) {
2898
CTRACE((tfp, "Bad HTML: Unmatched SELECT end tag\n"));
2903
* Set to know that we are no longer in a select tag.
2905
me->inSELECT = FALSE;
2908
* Clear the disable attribute.
2910
me->select_disabled = FALSE;
2913
* Finish the data off.
2915
HTChunkTerminate(&me->option);
2917
* Finish the previous option.
2919
ptr = HText_setLastOptionValue(me->text,
2921
me->LastOptionValue,
2923
me->LastOptionChecked,
2926
FREE(me->LastOptionValue);
2928
me->LastOptionChecked = FALSE;
2930
if (HTCurSelectGroupType == F_CHECKBOX_TYPE ||
2931
LYSelectPopups == FALSE) {
2933
* Start a newline after the last checkbox/button option.
2935
LYEnsureSingleSpace(me);
2938
* Output popup box with the default option to screen,
2939
* but use non-breaking spaces for output.
2942
me->sp[0].tag_number == HTML_PRE && strlen(ptr) > 6) {
2944
* The code inadequately handles OPTION fields in PRE tags.
2945
* We'll put up a minimum of 6 characters, and if any
2946
* more would exceed the wrap column, we'll ignore them.
2948
for (i = 0; i < 6; i++) {
2950
HText_appendCharacter(me->text, HT_NON_BREAK_SPACE);
2952
HText_appendCharacter(me->text, *ptr);
2955
HText_setIgnoreExcess(me->text, TRUE);
2957
for (; ptr && *ptr != '\0'; ptr++) {
2959
HText_appendCharacter(me->text, HT_NON_BREAK_SPACE);
2961
HText_appendCharacter(me->text, *ptr);
2964
* Add end option character.
2966
if (!me->first_option) {
2967
HText_appendCharacter(me->text, ']');
2968
HText_setLastChar(me->text, ']');
2971
HText_setIgnoreExcess(me->text, FALSE);
2973
HTChunkClear(&me->option);
2975
if (me->Underline_Level > 0 && me->inUnderline == FALSE) {
2976
HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR);
2977
me->inUnderline = TRUE;
2979
if (me->needBoldH == TRUE && me->inBoldH == FALSE) {
2980
HText_appendCharacter(me->text, LY_BOLD_START_CHAR);
2982
me->needBoldH = FALSE;
2988
** This function strips white characters and
2989
** generally fixes up attribute values that
2990
** were received from the SGML parser and
2991
** are to be treated as partial or absolute
2994
PUBLIC int LYLegitimizeHREF ARGS4(
3003
CONST char *Base = NULL;
3005
if (!me || !href || isEmpty(*href))
3008
if (!LYTrimStartfile(*href)) {
3010
* Collapse spaces in the actual URL, but just
3011
* protect against tabs or newlines in the
3012
* fragment, if present. This seeks to cope
3013
* with atrocities inflicted on the Web by
3014
* authoring tools such as Frontpage. - FM
3017
/* Before working on spaces check if we have any, usually none. */
3018
for (p = *href; (*p && !isspace(*p)); p++)
3021
if (*p) { /* p == first space character */
3022
/* no reallocs below, all converted in place */
3024
pound = findPoundSelector(*href);
3026
if (pound != NULL && pound < p) {
3027
convert_to_spaces(p, FALSE); /* done */
3031
*pound = '\0'; /* mark */
3034
* No blanks really belong in the HREF,
3035
* but if it refers to an actual file,
3036
* it may actually have blanks in the name.
3037
* Try to accommodate. See also HTParse().
3039
if (LYRemoveNewlines(p) || strchr(p, '\t') != 0) {
3040
LYRemoveBlanks(p); /* a compromise... */
3043
if (pound != NULL) {
3044
p = strchr(p, '\0');
3045
*pound = '#'; /* restore */
3046
convert_to_spaces(pound, FALSE);
3056
TRANSLATE_AND_UNESCAPE_TO_STD(href);
3059
me->base_href : me->node_anchor->address;
3061
url_type = is_url(*href);
3062
if (!url_type && force_slash && **href == '.' &&
3063
(!strcmp(*href, ".") || !strcmp(*href, "..")) &&
3064
!isFILE_URL(Base)) {
3066
* The Fielding RFC/ID for resolving partial HREFs says
3067
* that a slash should be on the end of the preceding
3068
* symbolic element for "." and "..", but all tested
3069
* browsers only do that for an explicit "./" or "../",
3070
* so we'll respect the RFC/ID only if force_slash was
3071
* TRUE and it's not a file URL. - FM
3073
StrAllocCat(*href, "/");
3075
if ((!url_type && LYStripDotDotURLs && strip_dots && **href == '.') &&
3076
!strncasecomp(Base, "http", 4)) {
3078
* We will be resolving a partial reference versus an http
3079
* or https URL, and it has lead dots, which may be retained
3080
* when resolving via HTParse(), but the request would fail
3081
* if the first element of the resultant path is two dots,
3082
* because no http or https server accepts such paths, and
3083
* the current URL draft, likely to become an RFC, says that
3084
* it's optional for the UA to strip them as a form of error
3085
* recovery. So we will, recursively, for http/https URLs,
3086
* like the "major market browsers" which made this problem
3087
* so common on the Web, but we'll also issue a message about
3088
* it, such that the bad partial reference might get corrected
3089
* by the document provider. - FM
3091
char *temp = NULL, *path = NULL, *cp;
3092
CONST char *str = "";
3094
temp = HTParse(*href, Base, PARSE_ALL);
3095
path = HTParse(temp, "", PARSE_PATH+PARSE_PUNCTUATION);
3096
if (!strncmp(path, "/..", 3)) {
3098
if (LYIsHtmlSep(*cp) || *cp == '\0') {
3099
if (Base[4] == 's') {
3102
CTRACE((tfp, "LYLegitimizeHREF: Bad value '%s' for http%s URL.\n",
3104
CTRACE((tfp, " Stripping lead dots.\n"));
3105
if (!me->inBadHREF) {
3106
HTUserMsg(BAD_PARTIAL_REFERENCE);
3107
me->inBadHREF = TRUE;
3111
StrAllocCopy(*href, "/");
3112
} else if (LYIsHtmlSep(*cp)) {
3113
while (!strncmp(cp, "/..", 3)) {
3114
if (*(cp + 3) == '/') {
3117
} else if (*(cp + 3) == '\0') {
3123
StrAllocCopy(*href, cp);
3133
** This function checks for a Content-Base header,
3134
** and if not present, a Content-Location header
3135
** which is an absolute URL, and sets the BASE
3136
** accordingly. If set, it will be replaced by
3137
** any BASE tag in the HTML stream, itself. - FM
3139
PUBLIC void LYCheckForContentBase ARGS1(
3143
BOOL present[HTML_BASE_ATTRIBUTES];
3144
CONST char *value[HTML_BASE_ATTRIBUTES];
3147
if (!(me && me->node_anchor))
3150
if (me->node_anchor->content_base != NULL) {
3152
* We have a Content-Base value. Use it
3153
* if it's non-zero length. - FM
3155
if (*me->node_anchor->content_base == '\0')
3157
StrAllocCopy(cp, me->node_anchor->content_base);
3159
} else if (me->node_anchor->content_location != NULL) {
3161
* We didn't have a Content-Base value, but do
3162
* have a Content-Location value. Use it if
3163
* it's an absolute URL. - FM
3165
if (*me->node_anchor->content_location == '\0')
3167
StrAllocCopy(cp, me->node_anchor->content_location);
3175
* We had neither a Content-Base nor
3176
* Content-Location value. - FM
3182
* If we collapsed to a zero-length value,
3191
* Pass the value to HTML_start_element as
3192
* the HREF of a BASE tag. - FM
3194
for (i = 0; i < HTML_BASE_ATTRIBUTES; i++)
3196
present[HTML_BASE_HREF] = YES;
3197
value[HTML_BASE_HREF] = (CONST char *)cp;
3198
(*me->isa->start_element)(me, HTML_BASE, present, value,
3204
** This function creates NAMEd Anchors if a non-zero-length NAME
3205
** or ID attribute was present in the tag. - FM
3207
PUBLIC void LYCheckForID ARGS4(
3209
CONST BOOL *, present,
3210
CONST char **, value,
3213
HTChildAnchor *ID_A = NULL;
3216
if (!(me && me->text))
3219
if (present && present[attribute]
3220
&& value[attribute] && *value[attribute]) {
3222
* Translate any named or numeric character references. - FM
3224
StrAllocCopy(temp, value[attribute]);
3225
LYUCTranslateHTMLString(&temp, me->tag_charset, me->tag_charset,
3226
NO, NO, YES, st_URL);
3229
* Create the link if we still have a non-zero-length string. - FM
3231
if ((temp[0] != '\0') &&
3232
(ID_A = HTAnchor_findChildAndLink(
3233
me->node_anchor, /* Parent */
3235
NULL, /* Addresss */
3236
(void *)0))) { /* Type */
3237
HText_beginAnchor(me->text, me->inUnderline, ID_A);
3238
HText_endAnchor(me->text, 0);
3245
** This function creates a NAMEd Anchor for the ID string
3246
** passed to it directly as an argument. It assumes the
3247
** does not need checking for character references. - FM
3249
PUBLIC void LYHandleID ARGS2(
3253
HTChildAnchor *ID_A = NULL;
3255
if (!(me && me->text) ||
3260
* Create the link if we still have a non-zero-length string. - FM
3262
if ((ID_A = HTAnchor_findChildAndLink(
3263
me->node_anchor, /* Parent */
3265
NULL, /* Addresss */
3266
(void *)0)) != NULL) { /* Type */
3267
HText_beginAnchor(me->text, me->inUnderline, ID_A);
3268
HText_endAnchor(me->text, 0);
3273
** This function checks whether we want to override
3274
** the current default alignment for paragraphs and
3275
** instead use that specified in the element's style
3278
PUBLIC BOOLEAN LYoverride_default_alignment ARGS1(
3284
switch(me->sp[0].tag_number) {
3285
case HTML_BLOCKQUOTE:
3290
me->sp->style->alignment = HT_LEFT;
3300
** This function inserts newlines if needed to create double spacing,
3301
** and sets the left margin for subsequent text to the second line
3302
** indentation of the current style. - FM
3304
PUBLIC void LYEnsureDoubleSpace ARGS1(
3307
if (!me || !me->text)
3310
if (!HText_LastLineEmpty(me->text, FALSE)) {
3311
HText_setLastChar(me->text, ' '); /* absorb white space */
3312
HText_appendCharacter(me->text, '\r');
3313
HText_appendCharacter(me->text, '\r');
3314
} else if (!HText_PreviousLineEmpty(me->text, FALSE)) {
3315
HText_setLastChar(me->text, ' '); /* absorb white space */
3316
HText_appendCharacter(me->text, '\r');
3317
} else if (me->List_Nesting_Level >= 0) {
3318
HText_NegateLineOne(me->text);
3325
** This function inserts a newline if needed to create single spacing,
3326
** and sets the left margin for subsequent text to the second line
3327
** indentation of the current style. - FM
3329
PUBLIC void LYEnsureSingleSpace ARGS1(
3332
if (!me || !me->text)
3335
if (!HText_LastLineEmpty(me->text, FALSE)) {
3336
HText_setLastChar(me->text, ' '); /* absorb white space */
3337
HText_appendCharacter(me->text, '\r');
3338
} else if (me->List_Nesting_Level >= 0) {
3339
HText_NegateLineOne(me->text);
3346
** This function resets paragraph alignments for block
3347
** elements which do not have a defined style sheet. - FM
3349
PUBLIC void LYResetParagraphAlignment ARGS1(
3355
if (me->List_Nesting_Level >= 0 ||
3356
((me->Division_Level < 0) &&
3357
(me->sp->style->id == ST_Normal ||
3358
me->sp->style->id == ST_Preformatted))) {
3359
me->sp->style->alignment = HT_LEFT;
3361
me->sp->style->alignment = (short) me->current_default_alignment;
3367
** This example function checks whether the given anchor has
3368
** an address with a file scheme, and if so, loads it into the
3369
** the SGML parser's context->url element, which was passed as
3370
** the second argument. The handle_comment() calling function in
3371
** SGML.c then calls LYDoCSI() in LYUtils.c to insert HTML markup
3372
** into the corresponding stream, homologously to an SSI by an
3373
** HTTP server. - FM
3375
** For functions similar to this but which depend on details of
3376
** the HTML handler's internal data, the calling interface should
3377
** be changed, and functions in SGML.c would have to make sure not
3378
** to call such functions inappropriately (e.g., calling a function
3379
** specific to the Lynx_HTML_Handler when SGML.c output goes to
3380
** some other HTStructured object like in HTMLGen.c), or the new
3381
** functions could be added to the SGML.h interface.
3383
PUBLIC BOOLEAN LYCheckForCSI ARGS2(
3384
HTParentAnchor *, anchor,
3387
if (!(anchor && anchor->address))
3390
if (!isFILE_URL(anchor->address))
3393
if (!LYisLocalHost(anchor->address))
3396
StrAllocCopy(*url, anchor->address);
3401
** This function is called from the SGML parser to look at comments
3402
** and see whether we should collect some info from them. Currently
3403
** it only looks for comments with Message-Id and Subject info, in the
3404
** exact form generated by MHonArc for archived mailing list. If found,
3405
** the info is stored in the document's HTParentAnchor. It can later be
3406
** used for generating a mail response.
3408
** We are extra picky here because there isn't any official definition
3409
** for these kinds of comments - we might (and still can) misinterpret
3410
** arbitrary comments as something they aren't.
3412
** If something doesn't look right, for example invalid characters, the
3413
** strings are not stored. Mail responses will use something else as
3414
** the subject, probably the document URL, and will not have an
3415
** In-Reply-To header.
3417
** All this is a hack - to do this the right way, mailing list archivers
3418
** would have to agree on some better mechanism to make this kind of info
3419
** from original mail headers available, for example using LINK. - kw
3421
PUBLIC BOOLEAN LYCommentHacks ARGS2(
3422
HTParentAnchor *, anchor,
3423
CONST char *, comment)
3425
CONST char *cp = comment;
3428
if (comment == NULL)
3431
if (!(anchor && anchor->address))
3434
if (strncmp(comment, "!--X-Message-Id: ", 17) == 0) {
3435
char *messageid = NULL;
3437
for (cp = comment+17; *cp; cp++) {
3438
if (UCH(*cp) >= 127 || !isgraph(UCH(*cp))) {
3442
if (strcmp(cp, " --")) {
3446
StrAllocCopy(messageid, cp);
3447
/* This should be ok - message-id should only contain 7-bit ASCII */
3448
if (!LYUCTranslateHTMLString(&messageid, 0, 0, NO, NO, YES, st_URL))
3450
for (p = messageid; *p; p++) {
3451
if (UCH(*p) >= 127 || !isgraph(UCH(*p))) {
3455
if (strcmp(p, " --")) {
3459
if ((p = strchr(messageid, '@')) == NULL || p[1] == '\0') {
3464
if ((len = strlen(p)) >= 8 && !strcmp(&p[len-3], " --")) {
3470
if (HTAnchor_setMessageID(anchor, messageid)) {
3478
if (strncmp(comment, "!--X-Subject: ", 14) == 0) {
3479
char *subject = NULL;
3481
for (cp = comment+14; *cp; cp++) {
3482
if (UCH(*cp) >= 127 || !isprint(UCH(*cp))) {
3487
StrAllocCopy(subject, cp);
3489
* This may not be the right thing for the subject - but mail
3490
* subjects shouldn't contain 8-bit characters in raw form anyway.
3491
* We have to unescape character entities, since that's what MHonArc
3492
* seems to generate. But if after that there are 8-bit characters
3493
* the string is rejected. We would probably not know correctly
3494
* what charset to assume anyway - the mail sender's can differ from
3495
* the archive's. And the code for sending mail cannot deal well
3496
* with 8-bit characters - we should not put them in the Subject
3497
* header in raw form, but don't have MIME encoding implemented.
3498
* Someone may want to do more about this... - kw
3500
if (!LYUCTranslateHTMLString(&subject, 0, 0, NO, YES, NO, st_HTML))
3502
for (p = subject; *p; p++) {
3503
if (UCH(*p) >= 127 || !isprint(UCH(*p))) {
3509
if ((len = strlen(p)) >= 4 && !strcmp(&p[len-3], " --")) {
3515
if (HTAnchor_setSubject(anchor, subject)) {
3528
* Create the Title with any left-angle-brackets
3529
* converted to < entities and any ampersands
3530
* converted to & entities. - FM
3532
* Convert 8-bit letters to &#xUUUU to avoid dependencies
3533
* from display character set which may need changing.
3534
* Do NOT convert any 8-bit chars if we have CJK display. - LP
3536
void LYformTitle ARGS2(
3540
if (HTCJK == JAPANESE) {
3541
char *tmp_buffer = NULL;
3542
if ((tmp_buffer = (char *) malloc (strlen(src)+1)) == 0)
3543
outofmem(__FILE__, "LYformTitle");
3544
switch(kanji_code) { /* 1997/11/22 (Sat) 09:28:00 */
3546
TO_EUC((CONST unsigned char *) src, (unsigned char *) tmp_buffer);
3549
TO_SJIS((CONST unsigned char *) src, (unsigned char *) tmp_buffer);
3552
CTRACE((tfp, "\nLYformTitle: kanji_code is an unexpected value."));
3553
strcpy(tmp_buffer, src);
3556
StrAllocCopy(*dst, tmp_buffer);
3559
StrAllocCopy(*dst, src);