1
/* ========================================================================
2
* Copyright 1988-2006 University of Washington
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
11
* ========================================================================
15
* Program: UTF-8 auxillary routines (c-client and MIME2 support)
17
* Author: Mark Crispin
18
* Networks and Distributed Computing
19
* Computing & Communications
20
* University of Washington
21
* Administration Building, AG-44
23
* Internet: MRC@CAC.Washington.EDU
26
* Last Edited: 6 November 2006
34
/* Convert charset labelled stringlist to UTF-8 in place
35
* Accepts: string list
39
static void utf8_stringlist (STRINGLIST *st,char *charset)
42
/* convert entire stringstruct */
43
if (st) do if (utf8_text (&st->text,charset,&txt,U8T_CANONICAL)) {
44
fs_give ((void **) &st->text.data);
45
st->text.data = txt.data; /* transfer this text */
46
st->text.size = txt.size;
47
} while (st = st->next);
51
/* Convert charset labelled searchpgm to UTF-8 in place
52
* Accepts: search program
56
void utf8_searchpgm (SEARCHPGM *pgm,char *charset)
62
if (pgm) { /* must have a search program */
63
utf8_stringlist (pgm->bcc,charset);
64
utf8_stringlist (pgm->cc,charset);
65
utf8_stringlist (pgm->from,charset);
66
utf8_stringlist (pgm->to,charset);
67
utf8_stringlist (pgm->subject,charset);
68
for (hl = pgm->header; hl; hl = hl->next) {
69
if (utf8_text (&hl->line,charset,&txt,U8T_CANONICAL)) {
70
fs_give ((void **) &hl->line.data);
71
hl->line.data = txt.data;
72
hl->line.size = txt.size;
74
if (utf8_text (&hl->text,charset,&txt,U8T_CANONICAL)) {
75
fs_give ((void **) &hl->text.data);
76
hl->text.data = txt.data;
77
hl->text.size = txt.size;
80
utf8_stringlist (pgm->body,charset);
81
utf8_stringlist (pgm->text,charset);
82
for (ol = pgm->or; ol; ol = ol->next) {
83
utf8_searchpgm (ol->first,charset);
84
utf8_searchpgm (ol->second,charset);
86
for (pl = pgm->not; pl; pl = pl->next) utf8_searchpgm (pl->pgm,charset);
87
utf8_stringlist (pgm->return_path,charset);
88
utf8_stringlist (pgm->sender,charset);
89
utf8_stringlist (pgm->reply_to,charset);
90
utf8_stringlist (pgm->in_reply_to,charset);
91
utf8_stringlist (pgm->message_id,charset);
92
utf8_stringlist (pgm->newsgroups,charset);
93
utf8_stringlist (pgm->followup_to,charset);
94
utf8_stringlist (pgm->references,charset);
98
/* Convert MIME-2 sized text to UTF-8
99
* Accepts: source sized text
101
* flags (same as utf8_text())
102
* Returns: T if successful, NIL if failure
106
#define MAXENCWORD 75
108
/* This resizing algorithm is stupid, but hopefully it should never be triggered
109
* except for a pathological header. The main concern is that we don't get a
113
#define DSIZE 65536 /* real headers should never be this big */
114
#define FUZZ 10 /* paranoia fuzz */
116
long utf8_mime2text (SIZEDTEXT *src,SIZEDTEXT *dst,long flags)
118
unsigned char *s,*se,*e,*ee,*t,*te;
122
size_t dsize = min (DSIZE,((src->size / 4) + 1) * 9);
123
/* always create buffer if canonicalizing */
124
dst->data = (flags & U8T_CANONICAL) ?
125
(unsigned char *) fs_get ((size_t) dsize) : NIL;
126
dst->size = 0; /* nothing written yet */
127
/* look for encoded words */
128
for (s = src->data, se = src->data + src->size; s < se; s++) {
129
if (((se - s) > MINENCWORD) && (*s == '=') && (s[1] == '?') &&
130
(cs = (char *) mime2_token (s+2,se,(unsigned char **) &ce)) &&
131
(e = mime2_token ((unsigned char *) ce+1,se,&ee)) &&
132
(te = mime2_text (t = e+2,se)) && (ee == e + 1) &&
133
((te - s) < MAXENCWORD)) {
134
if (mime2_decode (e,t,te,&txt)) {
135
*ce = '\0'; /* temporarily tie off charset */
136
if (ls = strchr (cs,'*')) *ls = '\0';
137
/* convert to UTF-8 as best we can */
138
if (!utf8_text (&txt,cs,&rtxt,flags)) utf8_text (&txt,NIL,&rtxt,flags);
139
if (dst->data) { /* make sure existing buffer fits */
140
while (dsize <= (dst->size + rtxt.size + FUZZ)) {
141
dsize += DSIZE; /* kick it up */
142
fs_resize ((void **) &dst->data,dsize);
145
else { /* make a new buffer */
146
while (dsize <= (dst->size + rtxt.size)) dsize += DSIZE;
147
memcpy (dst->data = (unsigned char *) fs_get (dsize),src->data,
148
dst->size = s - src->data);
150
for (i = 0; i < rtxt.size; i++) dst->data[dst->size++] = rtxt.data[i];
152
/* all done with converted text */
153
if (rtxt.data != txt.data) fs_give ((void **) &rtxt.data);
154
if (ls) *ls = '*'; /* restore language tag delimiter */
155
*ce = '?'; /* restore charset delimiter */
156
/* all done with decoded text */
157
fs_give ((void **) &txt.data);
158
s = te+1; /* continue scan after encoded word */
159
/* skip leading whitespace */
160
for (t = s + 1; (t < se) && ((*t == ' ') || (*t == '\t')); t++);
161
/* see if likely continuation encoded word */
162
if (t < (se - MINENCWORD)) switch (*t) {
163
case '=': /* possible encoded word? */
164
if (t[1] == '?') s = t - 1;
166
case '\015': /* CR, eat a following LF */
167
if (t[1] == '\012') t++;
168
case '\012': /* possible end of logical line */
169
if ((t[1] == ' ') || (t[1] == '\t')) {
171
while ((t < (se - MINENCWORD)) && ((t[1] == ' ')||(t[1] == '\t')));
172
if ((t < (se - MINENCWORD)) && (t[1] == '=') && (t[2] == '?'))
173
s = t; /* definitely looks like continuation */
177
else { /* restore original text */
178
if (dst->data) fs_give ((void **) &dst->data);
179
dst->data = src->data;
180
dst->size = src->size;
181
return NIL; /* syntax error: MIME-2 decoding failure */
184
else do if (dst->data) { /* stash ASCII characters until LWSP */
185
if (dsize < (dst->size + FUZZ)) {
186
dsize += DSIZE; /* kick it up */
187
fs_resize ((void **) &dst->data,dsize);
189
/* kludge: assumes ASCII doesn't decompose and titlecases to one byte */
190
dst->data[dst->size++] = (flags & U8T_CASECANON) ?
191
(unsigned char) ucs4_titlecase (*s) : *s;
193
while ((*s != ' ') && (*s != '\t') && (*s != '\015') && (*s != '\012') &&
196
if (dst->data) dst->data[dst->size] = '\0';
197
else { /* nothing converted, return identity */
198
dst->data = src->data;
199
dst->size = src->size;
201
return T; /* success */
204
/* Decode MIME-2 text
208
* destination sized text
209
* Returns: T if successful, else NIL
212
long mime2_decode (unsigned char *e,unsigned char *t,unsigned char *te,
216
txt->data = NIL; /* initially no returned data */
217
switch (*e) { /* dispatch based upon encoding */
218
case 'Q': case 'q': /* sort-of QUOTED-PRINTABLE */
219
txt->data = (unsigned char *) fs_get ((size_t) (te - t) + 1);
220
for (q = t,txt->size = 0; q < te; q++) switch (*q) {
221
case '=': /* quoted character */
222
/* both must be hex */
223
if (!isxdigit (q[1]) || !isxdigit (q[2])) {
224
fs_give ((void **) &txt->data);
225
return NIL; /* syntax error: bad quoted character */
227
/* assemble character */
228
txt->data[txt->size++] = hex2byte (q[1],q[2]);
229
q += 2; /* advance past quoted character */
231
case '_': /* convert to space */
232
txt->data[txt->size++] = ' ';
234
default: /* ordinary character */
235
txt->data[txt->size++] = *q;
238
txt->data[txt->size] = '\0';
240
case 'B': case 'b': /* BASE64 */
241
if (txt->data = (unsigned char *) rfc822_base64 (t,te - t,&txt->size))
243
default: /* any other encoding is unknown */
244
return NIL; /* syntax error: unknown encoding */
249
/* Get MIME-2 token from encoded word
250
* Accepts: current text pointer
252
* pointer to returned end pointer
253
* Returns: current text pointer & end pointer if success, else NIL
256
unsigned char *mime2_token (unsigned char *s,unsigned char *se,
259
for (*t = s; **t != '?'; ++*t) {
260
if ((*t < se) && isgraph (**t)) switch (**t) {
261
case '(': case ')': case '<': case '>': case '@': case ',': case ';':
262
case ':': case '\\': case '"': case '/': case '[': case ']': case '.':
264
return NIL; /* none of these are valid in tokens */
266
else return NIL; /* out of text or CTL or space */
272
/* Get MIME-2 text from encoded word
273
* Accepts: current text pointer
275
* pointer to returned end pointer
276
* Returns: end pointer if success, else NIL
279
unsigned char *mime2_text (unsigned char *s,unsigned char *se)
281
unsigned char *t = se - 1;
282
/* search for closing ?, make sure valid */
283
while ((s < t) && (*s != '?') && isgraph (*s++));
284
return ((s < t) && (*s == '?') && (s[1] == '=') &&
285
((se == (s + 2)) || (s[2] == ' ') || (s[2] == '\t') ||
286
(s[2] == '\015') || (s[2] == '\012'))) ? s : NIL;