7
/* #include <header_token.h>
12
/* const char *u.value;
17
/* int header_token(token, token_len, token_buffer, ptr,
18
/* specials, terminator)
19
/* HEADER_TOKEN *token;
21
/* VSTRING *token_buffer;
23
/* const char *specials;
26
/* This module parses a mail header value (text after field-name:)
27
/* into tokens. The parser understands RFC 822 linear white space,
28
/* quoted-string, comment, control characters, and a set of
29
/* user-specified special characters.
31
/* A result token type is one of the following:
32
/* .IP HEADER_TOK_QSTRING
33
/* Quoted string as per RFC 822.
34
/* .IP HEADER_TOK_TOKEN
35
/* Token as per RFC 822, and the special characters supplied by the
38
/* The value of a control character or special character.
40
/* header_token() tokenizes the input and stops after a user-specified
41
/* terminator (ignoring all tokens that exceed the capacity of
42
/* the result storage), or when it runs out of space for the result.
43
/* The terminator is not stored. The result value is the number of
44
/* tokens stored, or -1 when the input was exhausted before any tokens
49
/* Result array of HEADER_TOKEN structures. Token string values
50
/* are pointers to null-terminated substrings in the token_buffer.
52
/* Length of the array of HEADER_TOKEN structures.
54
/* Storage for result token string values.
56
/* Input/output read position. The input is a null-terminated string.
58
/* Special characters according to the relevant RFC, or a
59
/* null pointer (default to the RFC 822 special characters).
60
/* This must include the optional terminator if one is specified.
62
/* The special character to stop after, or zero.
64
/* Eight-bit characters are not given special treatment.
66
/* RFC 822 (ARPA Internet Text Messages)
68
/* Fatal errors: memory allocation problem.
72
/* The Secure Mailer license must be distributed with this software.
75
/* IBM T.J. Watson Research
77
/* Yorktown Heights, NY 10598, USA
86
/* Utility library. */
94
#include <header_token.h>
96
/* Application-specific. */
99
* Silly little macros.
101
#define STR(x) vstring_str(x)
102
#define LEN(x) VSTRING_LEN(x)
103
#define CU_CHAR_PTR(x) ((const unsigned char *) (x))
105
/* header_token - parse out the next item in a message header */
107
int header_token(HEADER_TOKEN *token, int token_len,
108
VSTRING *token_buffer, const char **ptr,
109
const char *user_specials, int user_terminator)
112
const unsigned char *cp;
121
VSTRING_RESET(token_buffer);
122
cp = CU_CHAR_PTR(*ptr);
124
if (user_specials == 0)
125
user_specials = LEX_822_SPECIALS;
130
* XXX What was the reason to continue parsing when user_terminator is
131
* specified? Perhaps this was needed at some intermediate stage of
134
while ((ch = *cp) != 0 && (user_terminator != 0 || tok_count < token_len)) {
138
* Skip RFC 822 linear white space.
140
if (IS_SPACE_TAB_CR_LF(ch))
146
if (ch == user_terminator)
150
* Skip RFC 822 comment.
154
while ((ch = *cp) != 0) {
156
if (ch == '(') { /* comments can nest! */
158
} else if (ch == ')') {
159
if (--comment_level == 0)
161
} else if (ch == '\\') {
171
* Copy quoted text according to RFC 822.
174
if (tok_count < token_len) {
175
token[tok_count].u.offset = LEN(token_buffer);
176
token[tok_count].type = HEADER_TOK_QSTRING;
178
while ((ch = *cp) != 0) {
182
if (ch == '\n') { /* unfold */
183
if (tok_count < token_len) {
184
len = LEN(token_buffer);
186
&& IS_SPACE_TAB_CR_LF(STR(token_buffer)[len - 1]))
188
if (len < LEN(token_buffer))
189
vstring_truncate(token_buffer, len);
198
if (tok_count < token_len)
199
VSTRING_ADDCH(token_buffer, ch);
201
if (tok_count < token_len) {
202
VSTRING_ADDCH(token_buffer, 0);
209
* Control, or special.
211
if (strchr(user_specials, ch) || ISCNTRL(ch)) {
212
if (tok_count < token_len) {
213
token[tok_count].u.offset = LEN(token_buffer);
214
token[tok_count].type = ch;
215
VSTRING_ADDCH(token_buffer, ch);
216
VSTRING_ADDCH(token_buffer, 0);
226
if (tok_count < token_len) {
227
token[tok_count].u.offset = LEN(token_buffer);
228
token[tok_count].type = HEADER_TOK_TOKEN;
229
VSTRING_ADDCH(token_buffer, ch);
231
while ((ch = *cp) != 0 && !IS_SPACE_TAB_CR_LF(ch)
232
&& !ISCNTRL(ch) && !strchr(user_specials, ch)) {
234
if (tok_count < token_len)
235
VSTRING_ADDCH(token_buffer, ch);
237
if (tok_count < token_len) {
238
VSTRING_ADDCH(token_buffer, 0);
246
* Ignore a zero-length item after the last terminator.
248
if (tok_count == 0 && ch == 0)
252
* Finalize. Fill in the string pointer array, now that the token buffer
253
* is no longer dynamically reallocated as it grows.
255
*ptr = (const char *) cp;
256
for (n = 0; n < tok_count; n++)
257
token[n].u.value = STR(token_buffer) + token[n].u.offset;
260
msg_info("header_token: %s %s %s",
261
tok_count > 0 ? token[0].u.value : "",
262
tok_count > 1 ? token[1].u.value : "",
263
tok_count > 2 ? token[2].u.value : "");