1
/* Parse HyperText Document Address HTParse.c
2
** ================================
10
#include <LYStrings.h>
11
#include <LYCharUtils.h>
18
#endif /* __MINGW32__ */
21
#define HEX_ESCAPE '%'
28
char * search; /* treated normally as part of path */
33
/* Strip white space off a string. HTStrip()
34
** -------------------------------
37
** Return value points to first non-white character, or to 0 if none.
38
** All trailing white space is OVERWRITTEN with zero.
40
PUBLIC char * HTStrip ARGS1(
43
#define SPACE(c) ((c == ' ') || (c == '\t') || (c == '\n'))
46
; /* Find end of string */
47
for (p--; p >= s; p--) {
49
*p = '\0'; /* Zap trailing blanks */
54
s++; /* Strip leading blanks */
58
/* Scan a filename for its constituents. scan()
59
** -------------------------------------
62
** name points to a document name which may be incomplete.
64
** absolute or relative may be nonzero (but not both).
65
** host, anchor and access may be nonzero if they were specified.
66
** Any which are nonzero point to zero terminated strings.
68
PRIVATE void scan ARGS2(
70
struct struct_parts *, parts)
77
parts->absolute = NULL;
78
parts->relative = NULL;
79
parts->search = NULL; /* normally not used - kw */
83
** Scan left-to-right for a scheme (access).
86
for (p = name; *p; p++) {
89
parts->access = name; /* Access name has been specified */
90
after_access = (p + 1);
93
if (*p == '/' || *p == '#' || *p == ';' || *p == '?')
98
** Scan left-to-right for a fragment (anchor).
100
for (p = after_access; *p; p++) {
102
parts->anchor = (p + 1);
103
*p = '\0'; /* terminate the rest */
104
break; /* leave things after first # alone - kw */
109
** Scan left-to-right for a host or absolute path.
114
parts->host = (p + 2); /* host has been specified */
115
*p = '\0'; /* Terminate access */
116
p = strchr(parts->host, '/'); /* look for end of host name if any */
118
*p = '\0'; /* Terminate host */
119
parts->absolute = (p + 1); /* Root has been found */
121
p = strchr(parts->host, '?');
123
*p = '\0'; /* Terminate host */
124
parts->search = (p + 1);
128
parts->absolute = (p + 1); /* Root found but no host */
131
parts->relative = (*after_access) ?
132
after_access : NULL; /* NULL for "" */
136
** Check schemes that commonly have unescaped hashes.
138
if (parts->access && parts->anchor &&
139
/* optimize */ strchr("lnsdLNSD", *parts->access) != NULL) {
140
if ((!parts->host && strcasecomp(parts->access, "lynxcgi")) ||
141
!strcasecomp(parts->access, "nntp") ||
142
!strcasecomp(parts->access, "snews") ||
143
!strcasecomp(parts->access, "news") ||
144
!strcasecomp(parts->access, "data")) {
146
* Access specified but no host and not a lynxcgi URL, so the
147
* anchor may not really be one, e.g., news:j462#36487@foo.bar,
148
* or it's an nntp or snews URL, or news URL with a host.
149
* Restore the '#' in the address.
151
/* but only if we have found a path component of which this will
152
* become part. - kw */
153
if (parts->relative || parts->absolute) {
154
*(parts->anchor - 1) = '#';
155
parts->anchor = NULL;
161
#if defined(HAVE_ALLOCA) && !defined(LY_FIND_LEAKS)
162
#define LYalloca(x) alloca(x)
163
#define LYalloca_free(x) {}
165
#define LYalloca(x) malloc(x)
166
#define LYalloca_free(x) free(x)
169
/* Parse a Name relative to another name. HTParse()
170
** --------------------------------------
172
** This returns those parts of a name which are given (and requested)
173
** substituting bits from the related name where necessary.
176
** aName A filename given
177
** relatedName A name relative to which aName is to be parsed
178
** wanted A mask for the bits which are wanted.
181
** returns A pointer to a malloc'd string which MUST BE FREED
183
PUBLIC char * HTParse ARGS3(
185
CONST char *, relatedName,
188
char * result = NULL;
189
char * tail = NULL; /* a pointer to the end of the 'result' string */
190
char * return_value = NULL;
196
struct struct_parts given, related;
198
CTRACE((tfp, "HTParse: aName:`%s'\n", aName));
199
CTRACE((tfp, " relatedName:`%s'\n", relatedName));
201
if (wanted & (PARSE_STRICTPATH | PARSE_QUERY)) { /* if detail wanted... */
202
if ((wanted & (PARSE_STRICTPATH | PARSE_QUERY))
203
== (PARSE_STRICTPATH | PARSE_QUERY)) /* if strictpath AND query */
204
wanted |= PARSE_PATH; /* then treat as if PARSE_PATH wanted */
205
if (wanted & PARSE_PATH) /* if PARSE_PATH wanted */
206
wanted &= ~(PARSE_STRICTPATH | PARSE_QUERY); /* ignore details */
208
CTRACE((tfp, " want:%s%s%s%s%s%s%s\n",
209
wanted & PARSE_PUNCTUATION ? " punc" : "",
210
wanted & PARSE_ANCHOR ? " anchor" : "",
211
wanted & PARSE_PATH ? " path" : "",
212
wanted & PARSE_HOST ? " host" : "",
213
wanted & PARSE_ACCESS ? " access" : "",
214
wanted & PARSE_STRICTPATH ? " PATH" : "",
215
wanted & PARSE_QUERY ? " QUERY" : ""));
218
** Allocate the temporary string. Optimized.
220
len1 = strlen(aName) + 1;
221
len2 = strlen(relatedName) + 1;
222
len = len1 + len2 + 8; /* Lots of space: more than enough */
224
result = tail = (char*)LYalloca(len * 2 + len1 + len2);
225
if (result == NULL) {
226
outofmem(__FILE__, "HTParse");
233
** Make working copy of the input string to cut up.
235
memcpy(name, aName, len1);
238
** Cut up the string into URL fields.
243
** Now related string.
245
if ((given.access && given.host && given.absolute) || !*relatedName) {
249
related.access = NULL;
251
related.absolute = NULL;
252
related.relative = NULL;
253
related.search = NULL;
254
related.anchor = NULL;
256
memcpy(rel, relatedName, len2);
262
** Handle the scheme (access) field.
264
if (given.access && given.host && !given.relative && !given.absolute) {
265
if (!strcmp(given.access, "http") ||
266
!strcmp(given.access, "https") ||
267
!strcmp(given.access, "ftp"))
273
acc_method = given.access ? given.access : related.access;
274
if (wanted & PARSE_ACCESS) {
276
strcpy(tail, acc_method);
277
tail += strlen(tail);
278
if (wanted & PARSE_PUNCTUATION) {
286
** If different schemes, inherit nothing.
288
** We'll try complying with RFC 1808 and
289
** the Fielding draft, and inherit nothing
290
** if both schemes are given, rather than
291
** only when they differ, except for
294
** After trying it for a while, it's still
295
** premature, IHMO, to go along with it, so
296
** this is back to inheriting for identical
297
** schemes whether or not they are "file".
298
** If you want to try it again yourself,
299
** uncomment the strcasecomp() below. - FM
301
if ((given.access && related.access) &&
302
(/* strcasecomp(given.access, "file") || */
303
strcmp(given.access, related.access))) {
305
related.absolute = NULL;
306
related.relative = NULL;
307
related.search = NULL;
308
related.anchor = NULL;
312
** Handle the host field.
314
if (wanted & PARSE_HOST) {
315
if (given.host || related.host) {
316
if (wanted & PARSE_PUNCTUATION) {
320
strcpy(tail, given.host ? given.host : related.host);
324
** Ignore default port numbers, and trailing dots on FQDNs,
325
** which will only cause identical addresses to look different.
326
** (related is already a clean url).
330
if ((p2 = strchr(result, '@')) != NULL)
332
p2 = strchr(tail, ':');
333
if (p2 != NULL && !isdigit(UCH(p2[1])))
335
** Colon not followed by a port number.
338
if (p2 != NULL && *p2 != '\0' && acc_method != NULL) {
342
if ((!strcmp(acc_method, "http" ) && !strcmp(p2, ":80" )) ||
343
(!strcmp(acc_method, "https" ) && !strcmp(p2, ":443")) ||
344
(!strcmp(acc_method, "gopher" ) && !strcmp(p2, ":70" )) ||
345
(!strcmp(acc_method, "ftp" ) && !strcmp(p2, ":21" )) ||
346
(!strcmp(acc_method, "wais" ) && !strcmp(p2, ":210")) ||
347
(!strcmp(acc_method, "nntp" ) && !strcmp(p2, ":119")) ||
348
(!strcmp(acc_method, "news" ) && !strcmp(p2, ":119")) ||
349
(!strcmp(acc_method, "newspost" ) && !strcmp(p2, ":119")) ||
350
(!strcmp(acc_method, "newsreply" ) && !strcmp(p2, ":119")) ||
351
(!strcmp(acc_method, "snews" ) && !strcmp(p2, ":563")) ||
352
(!strcmp(acc_method, "snewspost" ) && !strcmp(p2, ":563")) ||
353
(!strcmp(acc_method, "snewsreply") && !strcmp(p2, ":563")) ||
354
(!strcmp(acc_method, "finger" ) && !strcmp(p2, ":79" )) ||
355
(!strcmp(acc_method, "telnet" ) && !strcmp(p2, ":23" )) ||
356
(!strcmp(acc_method, "tn3270" ) && !strcmp(p2, ":23" )) ||
357
(!strcmp(acc_method, "rlogin" ) && !strcmp(p2, ":513")) ||
358
(!strcmp(acc_method, "cso" ) && !strcmp(p2, ":105")))
359
*p2 = '\0'; /* It is the default: ignore it */
362
int len3 = strlen(tail);
365
h = tail + len3 - 1; /* last char of hostname */
367
*h = '\0'; /* chop final . */
369
} else if (p2 != result) {
371
h--; /* End of hostname */
378
*h = '\0'; /* terminate */
382
#endif /* CLEAN_URLS */
387
* Trim any blanks from the result so far - there's no excuse for blanks
388
* in a hostname. Also update the tail here.
390
tail = LYRemoveBlanks(result);
393
** If host in given or related was ended directly with a '?' (no
394
** slash), fake the search part into absolute. This is the only
395
** case search is returned from scan. A host must have been present.
396
** this restores the '?' at which the host part had been truncated in
397
** scan, we have to do this after host part handling is done. - kw
399
if (given.search && *(given.search - 1) == '\0') {
400
given.absolute = given.search - 1;
401
given.absolute[0] = '?';
402
} else if (related.search && !related.absolute &&
403
*(related.search - 1) == '\0') {
404
related.absolute = related.search - 1;
405
related.absolute[0] = '?';
409
** If different hosts, inherit no path.
411
if (given.host && related.host)
412
if (strcmp(given.host, related.host) != 0) {
413
related.absolute = NULL;
414
related.relative = NULL;
415
related.anchor = NULL;
421
if (wanted & (PARSE_PATH | PARSE_STRICTPATH | PARSE_QUERY)) {
422
int want_detail = (wanted & (PARSE_STRICTPATH | PARSE_QUERY));
424
if (acc_method && !given.absolute && given.relative) {
426
* Treat all given nntp or snews paths, or given paths for news
427
* URLs with a host, as absolute.
429
switch (*acc_method) {
432
if (!strcasecomp(acc_method, "nntp") ||
433
(!strcasecomp(acc_method, "news") &&
434
!strncasecomp(result, "news://", 7))) {
435
given.absolute = given.relative;
436
given.relative = NULL;
441
if (!strcasecomp(acc_method, "snews")) {
442
given.absolute = given.relative;
443
given.relative = NULL;
449
if (given.absolute) { /* All is given */
450
if (wanted & PARSE_PUNCTUATION)
452
strcpy(tail, given.absolute);
453
CTRACE((tfp, "HTParse: (ABS)\n"));
454
} else if (related.absolute) { /* Adopt path not name */
456
strcpy(tail, related.absolute);
457
if (given.relative) {
458
p = strchr(tail, '?'); /* Search part? */
460
p = (tail + strlen(tail) - 1);
461
for (; *p != '/'; p--)
463
p[1] = '\0'; /* Remove filename */
464
strcat(p, given.relative); /* Add given one */
467
CTRACE((tfp, "HTParse: (Related-ABS)\n"));
468
} else if (given.relative) {
469
strcpy(tail, given.relative); /* what we've got */
470
CTRACE((tfp, "HTParse: (REL)\n"));
471
} else if (related.relative) {
472
strcpy(tail, related.relative);
473
CTRACE((tfp, "HTParse: (Related-REL)\n"));
474
} else { /* No inheritance */
475
if (!isLYNXCGI(aName) &&
476
!isLYNXEXEC(aName) &&
477
!isLYNXPROG(aName)) {
481
if (!strcmp(result, "news:/"))
483
CTRACE((tfp, "HTParse: (No inheritance)\n"));
486
p = strchr(tail, '?'); /* Search part? */
488
if (PARSE_STRICTPATH) {
491
if (!(wanted & PARSE_PUNCTUATION))
498
if (wanted & PARSE_QUERY)
505
** Handle the fragment (anchor). Never inherit.
507
if (wanted & PARSE_ANCHOR) {
508
if (given.anchor && *given.anchor) {
509
tail += strlen(tail);
510
if (wanted & PARSE_PUNCTUATION)
512
strcpy(tail, given.anchor);
517
* If there are any blanks remaining in the string, escape them as needed.
518
* See the discussion in LYLegitimizeHREF() for example.
520
if ((p = strchr(result, ' ')) != 0) {
521
switch (is_url(result)) {
522
case UNKNOWN_URL_TYPE:
523
CTRACE((tfp, "HTParse: ignore:`%s'\n", result));
525
case LYNXEXEC_URL_TYPE:
526
case LYNXPROG_URL_TYPE:
527
case LYNXCGI_URL_TYPE:
528
case LYNXPRINT_URL_TYPE:
529
case LYNXHIST_URL_TYPE:
530
case LYNXDOWNLOAD_URL_TYPE:
531
case LYNXKEYMAP_URL_TYPE:
532
case LYNXIMGMAP_URL_TYPE:
533
case LYNXCOOKIE_URL_TYPE:
534
case LYNXDIRED_URL_TYPE:
535
case LYNXOPTIONS_URL_TYPE:
536
case LYNXCFG_URL_TYPE:
537
case LYNXCOMPILE_OPTS_URL_TYPE:
538
case LYNXMESSAGES_URL_TYPE:
539
CTRACE((tfp, "HTParse: spaces:`%s'\n", result));
543
CTRACE((tfp, "HTParse: encode:`%s'\n", result));
545
char *q = p + strlen(p) + 2;
553
} while ((p = strchr(result, ' ')) != 0);
557
CTRACE((tfp, "HTParse: result:`%s'\n", result));
559
StrAllocCopy(return_value, result);
560
LYalloca_free(result);
562
/* FIXME: could be optimized using HTParse() internals */
564
((wanted & PARSE_ALL_WITHOUT_ANCHOR) == PARSE_ALL_WITHOUT_ANCHOR)) {
566
* Check whether to fill in localhost. - FM
568
LYFillLocalFileURL(&return_value, relatedName);
569
CTRACE((tfp, "pass LYFillLocalFile:`%s'\n", return_value));
572
return return_value; /* exactly the right length */
575
/* HTParseAnchor(), fast HTParse() specialization
576
** ----------------------------------------------
579
** returns A pointer within input string (probably to its end '\0')
581
PUBLIC CONST char * HTParseAnchor ARGS1(
584
CONST char* p = aName;
585
for ( ; *p && *p != '#'; p++)
588
/* the safe way based on HTParse() -
589
* keeping in mind scan() peculiarities on schemes:
591
struct struct_parts given;
593
char* name = (char*)LYalloca((p - aName) + strlen(p) + 1);
595
outofmem(__FILE__, "HTParseAnchor");
602
if (given.anchor == NULL) {
603
for ( ; *p; p++) /*scroll to end '\0'*/
610
/* Simplify a filename. HTSimplify()
611
** --------------------
613
** A unix-style file is allowed to contain the sequence xxx/../ which may
614
** be replaced by "" , and the sequence "/./" which may be replaced by "/".
615
** Simplification helps us recognize duplicate filenames.
617
** Thus, /etc/junk/../fred becomes /etc/fred
618
** /etc/junk/./fred becomes /etc/junk/fred
620
** but we should NOT change
621
** http://fred.xxx.edu/../..
623
** or ../../albert.html
625
PUBLIC void HTSimplify ARGS1(
631
if (filename == NULL)
634
if (!(filename[0] && filename[1]) ||
635
filename[0] == '?' || filename[1] == '?' || filename[2] == '?')
638
if (strchr(filename, '/') != NULL) {
639
for (p = (filename + 2); *p; p++) {
642
** We're still treating a ?searchpart as part of
643
** the path in HTParse() and scan(), but if we
644
** encounter a '?' here, assume it's the delimiter
645
** and break. We also could check for a parameter
646
** delimiter (';') here, but the current Fielding
647
** draft (wisely or ill-advisedly :) says that it
648
** should be ignored and collapsing be allowed in
649
** it's value). The only defined parameter at
650
** present is ;type=[A, I, or D] for ftp URLs, so
651
** if there's a "/..", "/../", "/./", or terminal
652
** '.' following the ';', it must be due to the
653
** ';' being an unescaped path character and not
654
** actually a parameter delimiter. - FM
659
if ((p[1] == '.') && (p[2] == '.') &&
660
(p[3] == '/' || p[3] == '?' || p[3] == '\0')) {
662
** Handle "../", "..?" or "..".
664
for (q = (p - 1); (q >= filename) && (*q != '/'); q--)
666
** Back up to previous slash or beginning of string.
670
(strncmp(q, "/../", 4) &&
671
strncmp(q, "/..?", 4)) &&
672
!((q - 1) > filename && q[-1] == '/')) {
674
** Not at beginning of string or in a
675
** host field, so remove the "/xxx/..".
681
*p = '\0'; /* terminate */
683
** Start again with previous slash.
687
} else if (p[1] == '.' && p[2] == '/') {
689
** Handle "./" by removing both characters.
695
*q = '\0'; /* terminate */
697
} else if (p[1] == '.' && p[2] == '?') {
699
** Handle ".?" by removing the dot.
705
*q = '\0'; /* terminate */
707
} else if (p[1] == '.' && p[2] == '\0') {
709
** Handle terminal "." by removing the character.
715
if (p >= filename + 2 && *p == '?' && *(p-1) == '.') {
718
** Handle "/.?" by removing the dot.
725
} else if (*(p-2) == '.' &&
726
p >= filename + 4 && *(p-3) == '/' &&
728
(p > filename + 4 && *(p-5) != ':'))) {
732
for (q = (p - 4); (q > filename) && (*q != '/'); q--)
734
** Back up to previous slash or beginning of string.
738
if (q > filename && *(q-1) == '/' &&
739
!(q > filename + 1 && *(q-1) != ':'))
743
if (strncmp(q, "../", 3) && strncmp(q, "./", 2)) {
745
** Not after "//" at beginning of string or
746
** after "://", and xxx is not ".." or ".",
747
** so remove the "xxx/..".
753
*p = '\0'; /* terminate */
760
/* Make Relative Name. HTRelative()
761
** -------------------
763
** This function creates and returns a string which gives an expression of
764
** one address as related to another. Where there is no relation, an absolute
765
** address is returned.
768
** Both names must be absolute, fully qualified names of nodes
772
** The return result points to a newly allocated name which, if
773
** parsed by HTParse relative to relatedName, will yield aName.
774
** The caller is responsible for freeing the resulting name later.
777
PUBLIC char * HTRelative ARGS2(
779
CONST char *, relatedName)
781
char * result = NULL;
782
CONST char *p = aName;
783
CONST char *q = relatedName;
784
CONST char * after_access = NULL;
785
CONST char * path = NULL;
786
CONST char * last_slash = NULL;
789
for (; *p; p++, q++) { /* Find extent of match */
802
/* q, p point to the first non-matching character or zero */
804
if (!after_access) { /* Different access */
805
StrAllocCopy(result, aName);
806
} else if (slashes < 3){ /* Different nodes */
807
StrAllocCopy(result, after_access);
808
} else if (slashes == 3){ /* Same node, different path */
809
StrAllocCopy(result, path);
810
} else { /* Some path in common */
812
for (; *q && (*q != '#'); q++)
815
result = typecallocn(char, 3*levels + strlen(last_slash) + 1);
817
outofmem(__FILE__, "HTRelative");
819
for (; levels; levels--)
820
strcat(result, "../");
821
strcat(result, last_slash+1);
824
"HTparse: `%s' expressed relative to\n `%s' is\n `%s'.\n",
825
aName, relatedName, result));
829
/* Escape undesirable characters using % HTEscape()
830
** -------------------------------------
832
** This function takes a pointer to a string in which
833
** some characters may be unacceptable unescaped.
834
** It returns a string which has these characters
835
** represented by a '%' character followed by two hex digits.
837
** Unlike HTUnEscape(), this routine returns a calloc'd string.
839
PRIVATE CONST unsigned char isAcceptable[96] =
841
/* Bit 0 xalpha -- see HTFile.h
842
** Bit 1 xpalpha -- as xalpha but with plus.
843
** Bit 2 ... path -- as xpalphas but with /
845
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
846
{ 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
847
7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
848
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
849
7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
850
0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
851
7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{|}~ DEL */
853
PRIVATE char *hex = "0123456789ABCDEF";
854
#define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
856
PUBLIC char * HTEscape ARGS2(
863
int unacceptable = 0;
864
for (p = str; *p; p++)
865
if (!ACCEPTABLE(UCH(TOASCII(*p))))
867
result = typecallocn(char, p-str + unacceptable + unacceptable + 1);
869
outofmem(__FILE__, "HTEscape");
870
for (q = result, p = str; *p; p++) {
871
unsigned char a = TOASCII(*p);
872
if (!ACCEPTABLE(a)) {
873
*q++ = HEX_ESCAPE; /* Means hex coming */
879
*q++ = '\0'; /* Terminate */
883
/* Escape unsafe characters using % HTEscapeUnsafe()
884
** --------------------------------
886
** This function takes a pointer to a string in which
887
** some characters may be that may be unsafe are unescaped.
888
** It returns a string which has these characters
889
** represented by a '%' character followed by two hex digits.
891
** Unlike HTUnEscape(), this routine returns a malloc'd string.
893
#define UNSAFE(ch) (((ch) <= 32) || ((ch) >= 127))
895
PUBLIC char *HTEscapeUnsafe ARGS1(
901
int unacceptable = 0;
902
for (p = str; *p; p++)
903
if (UNSAFE(UCH(TOASCII(*p))))
905
result = typecallocn(char, p-str + unacceptable + unacceptable + 1);
907
outofmem(__FILE__, "HTEscapeUnsafe");
908
for (q = result, p = str; *p; p++) {
909
unsigned char a = TOASCII(*p);
911
*q++ = HEX_ESCAPE; /* Means hex coming */
917
*q++ = '\0'; /* Terminate */
921
/* Escape undesirable characters using % but space to +. HTEscapeSP()
922
** -----------------------------------------------------
924
** This function takes a pointer to a string in which
925
** some characters may be unacceptable unescaped.
926
** It returns a string which has these characters
927
** represented by a '%' character followed by two hex digits,
928
** except that spaces are converted to '+' instead of %2B.
930
** Unlike HTUnEscape(), this routine returns a calloced string.
932
PUBLIC char * HTEscapeSP ARGS2(
939
int unacceptable = 0;
940
for (p = str; *p; p++)
941
if (!(*p == ' ' || ACCEPTABLE(UCH(TOASCII(*p)))))
943
result = typecallocn(char, p-str + unacceptable + unacceptable + 1);
945
outofmem(__FILE__, "HTEscape");
946
for (q = result, p = str; *p; p++) {
947
unsigned char a = TOASCII(*p);
950
} else if (!ACCEPTABLE(a)) {
951
*q++ = HEX_ESCAPE; /* Means hex coming */
958
*q++ = '\0'; /* Terminate */
962
/* Decode %xx escaped characters. HTUnEscape()
963
** ------------------------------
965
** This function takes a pointer to a string in which some
966
** characters may have been encoded in %xy form, where xy is
967
** the ASCII hex code for character 16x+y.
968
** The string is converted in place, as it will never grow.
970
PRIVATE char from_hex ARGS1(
973
return (char) ( c >= '0' && c <= '9' ? c - '0'
974
: c >= 'A' && c <= 'F'? c - 'A' + 10
975
: c - 'a' + 10); /* accept small letters just in case */
978
PUBLIC char * HTUnEscape ARGS1(
988
if (*p == HEX_ESCAPE &&
990
* Tests shouldn't be needed, but better safe than sorry.
993
isxdigit(UCH(p[1])) &&
994
isxdigit(UCH(p[2]))) {
997
*q = (char) (from_hex(*p++) * 16);
1000
** Careful! FROMASCII() may evaluate its arg more than once!
1001
*/ /* S/390 -- gil -- 0221 */
1002
*q = (char) (*q + from_hex(*p++));
1016
/* Decode some %xx escaped characters. HTUnEscapeSome()
1017
** ----------------------------------- Klaus Weide
1018
** (kweide@tezcat.com)
1019
** This function takes a pointer to a string in which some
1020
** characters may have been encoded in %xy form, where xy is
1021
** the ASCII hex code for character 16x+y, and a pointer to
1022
** a second string containing one or more characters which
1023
** should be unescaped if escaped in the first string.
1024
** The first string is converted in place, as it will never grow.
1026
PUBLIC char * HTUnEscapeSome ARGS2(
1028
CONST char *, do_trans)
1034
if (p == NULL || *p == '\0' || do_trans == NULL || *do_trans == '\0')
1037
while (*p != '\0') {
1038
if (*p == HEX_ESCAPE &&
1039
p[1] && p[2] && /* tests shouldn't be needed, but.. */
1040
isxdigit(UCH(p[1])) &&
1041
isxdigit(UCH(p[2])) &&
1042
(testcode = (char) FROMASCII(from_hex(p[1])*16 +
1043
from_hex(p[2]))) && /* %00 no good*/
1044
strchr(do_trans, testcode)) { /* it's one of the ones we want */
1055
} /* HTUnEscapeSome */
1057
PRIVATE CONST unsigned char crfc[96] =
1059
/* Bit 0 xalpha -- need "quoting"
1060
** Bit 1 xpalpha -- need \escape if quoted
1062
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
1063
{ 1,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0, /* 2x !"#$%&'()*+,-./ */
1064
0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0, /* 3x 0123456789:;<=>? */
1065
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4x @ABCDEFGHIJKLMNO */
1066
0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0, /* 5X PQRSTUVWXYZ[\]^_ */
1067
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6x `abcdefghijklmno */
1068
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 }; /* 7X pqrstuvwxyz{|}~ DEL */
1071
** Turn a string which is not a RFC 822 token into a quoted-string. - KW
1072
** The "quoted" parameter tells whether we need the beginning/ending quote
1073
** marks. If not, the caller will provide them -TD
1075
PUBLIC void HTMake822Word ARGS2(
1085
if (isEmpty(*str)) {
1086
StrAllocCopy(*str, quoted ? "\"\"" : "");
1089
for (p = *str; *p; p++) {
1090
a = TOASCII(*p); /* S/390 -- gil -- 0240 */
1091
if (a < 32 || a >= 128 ||
1092
((crfc[a-32]) & 1)) {
1095
if (a >= 160 || a == '\t')
1097
if (a == '\r' || a == '\n')
1099
else if ((a & 127) < 32 || ((crfc[a-32]) & 2))
1105
result = typecallocn(char, p-(*str) + added + 1);
1107
outofmem(__FILE__, "HTMake822Word");
1113
** Having converted the character to ASCII, we can't use symbolic
1114
** escape codes, since they're in the host character set, which
1115
** is not necessarily ASCII. Thus we use octal escape codes instead.
1116
** -- gil (Paul Gilmartin) <pg@sweng.stortek.com>
1117
*/ /* S/390 -- gil -- 0268 */
1118
for (p = *str; *p; p++) {
1120
if ((a != '\011') && ((a & 127) < 32 ||
1121
( a < 128 && ((crfc[a-32]) & 2))))
1124
if (a == '\012' || (a == '\015' && (TOASCII(*(p+1)) != '\012')))
1129
*q++ = '\0'; /* Terminate */