1
/* $Id: scanner.c 4209 2012-07-18 10:21:00Z ming $ */
3
* Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4
* Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
#include <pjlib-util/scanner.h>
22
#include <pj/string.h>
23
#include <pj/except.h>
26
#include <pj/assert.h>
28
#define PJ_SCAN_IS_SPACE(c) ((c)==' ' || (c)=='\t')
29
#define PJ_SCAN_IS_NEWLINE(c) ((c)=='\r' || (c)=='\n')
30
#define PJ_SCAN_IS_PROBABLY_SPACE(c) ((c) <= 32)
31
#define PJ_SCAN_CHECK_EOF(s) (s != scanner->end)
34
#if defined(PJ_SCANNER_USE_BITWISE) && PJ_SCANNER_USE_BITWISE != 0
35
# include "scanner_cis_bitwise.c"
37
# include "scanner_cis_uint.c"
41
static void pj_scan_syntax_err(pj_scanner *scanner)
43
(*scanner->callback)(scanner);
47
PJ_DEF(void) pj_cis_add_range(pj_cis_t *cis, int cstart, int cend)
49
/* Can not set zero. This is the requirement of the parser. */
50
pj_assert(cstart > 0);
52
while (cstart != cend) {
53
PJ_CIS_SET(cis, cstart);
58
PJ_DEF(void) pj_cis_add_alpha(pj_cis_t *cis)
60
pj_cis_add_range( cis, 'a', 'z'+1);
61
pj_cis_add_range( cis, 'A', 'Z'+1);
64
PJ_DEF(void) pj_cis_add_num(pj_cis_t *cis)
66
pj_cis_add_range( cis, '0', '9'+1);
69
PJ_DEF(void) pj_cis_add_str( pj_cis_t *cis, const char *str)
72
PJ_CIS_SET(cis, *str);
77
PJ_DEF(void) pj_cis_add_cis( pj_cis_t *cis, const pj_cis_t *rhs)
80
for (i=0; i<256; ++i) {
81
if (PJ_CIS_ISSET(rhs, i))
86
PJ_DEF(void) pj_cis_del_range( pj_cis_t *cis, int cstart, int cend)
88
while (cstart != cend) {
89
PJ_CIS_CLR(cis, cstart);
94
PJ_DEF(void) pj_cis_del_str( pj_cis_t *cis, const char *str)
97
PJ_CIS_CLR(cis, *str);
102
PJ_DEF(void) pj_cis_invert( pj_cis_t *cis )
105
/* Can not set zero. This is the requirement of the parser. */
106
for (i=1; i<256; ++i) {
107
if (PJ_CIS_ISSET(cis,i))
114
PJ_DEF(void) pj_scan_init( pj_scanner *scanner, char *bufstart, int buflen,
115
unsigned options, pj_syn_err_func_ptr callback )
119
scanner->begin = scanner->curptr = bufstart;
120
scanner->end = bufstart + buflen;
122
scanner->start_line = scanner->begin;
123
scanner->callback = callback;
124
scanner->skip_ws = options;
126
if (scanner->skip_ws)
127
pj_scan_skip_whitespace(scanner);
131
PJ_DEF(void) pj_scan_fini( pj_scanner *scanner )
134
PJ_UNUSED_ARG(scanner);
137
PJ_DEF(void) pj_scan_skip_whitespace( pj_scanner *scanner )
139
register char *s = scanner->curptr;
141
while (PJ_SCAN_IS_SPACE(*s)) {
145
if (PJ_SCAN_IS_NEWLINE(*s) && (scanner->skip_ws & PJ_SCAN_AUTOSKIP_NEWLINE)) {
151
scanner->curptr = scanner->start_line = s;
152
} else if (*s == '\n') {
155
scanner->curptr = scanner->start_line = s;
156
} else if (PJ_SCAN_IS_SPACE(*s)) {
159
} while (PJ_SCAN_IS_SPACE(*s));
166
if (PJ_SCAN_IS_NEWLINE(*s) && (scanner->skip_ws & PJ_SCAN_AUTOSKIP_WS_HEADER)==PJ_SCAN_AUTOSKIP_WS_HEADER) {
167
/* Check for header continuation. */
176
scanner->start_line = s;
178
if (PJ_SCAN_IS_SPACE(*s)) {
179
register char *t = s;
182
} while (PJ_SCAN_IS_SPACE(*t));
192
PJ_DEF(void) pj_scan_skip_line( pj_scanner *scanner )
194
char *s = pj_ansi_strchr(scanner->curptr, '\n');
196
scanner->curptr = scanner->end;
198
scanner->curptr = scanner->start_line = s+1;
203
PJ_DEF(int) pj_scan_peek( pj_scanner *scanner,
204
const pj_cis_t *spec, pj_str_t *out)
206
register char *s = scanner->curptr;
208
if (s >= scanner->end) {
209
pj_scan_syntax_err(scanner);
213
/* Don't need to check EOF with PJ_SCAN_CHECK_EOF(s) */
214
while (pj_cis_match(spec, *s))
217
pj_strset3(out, scanner->curptr, s);
222
PJ_DEF(int) pj_scan_peek_n( pj_scanner *scanner,
223
pj_size_t len, pj_str_t *out)
225
char *endpos = scanner->curptr + len;
227
if (endpos > scanner->end) {
228
pj_scan_syntax_err(scanner);
232
pj_strset(out, scanner->curptr, len);
237
PJ_DEF(int) pj_scan_peek_until( pj_scanner *scanner,
238
const pj_cis_t *spec,
241
register char *s = scanner->curptr;
243
if (s >= scanner->end) {
244
pj_scan_syntax_err(scanner);
248
while (PJ_SCAN_CHECK_EOF(s) && !pj_cis_match( spec, *s))
251
pj_strset3(out, scanner->curptr, s);
256
PJ_DEF(void) pj_scan_get( pj_scanner *scanner,
257
const pj_cis_t *spec, pj_str_t *out)
259
register char *s = scanner->curptr;
261
pj_assert(pj_cis_match(spec,0)==0);
263
/* EOF is detected implicitly */
264
if (!pj_cis_match(spec, *s)) {
265
pj_scan_syntax_err(scanner);
271
} while (pj_cis_match(spec, *s));
272
/* No need to check EOF here (PJ_SCAN_CHECK_EOF(s)) because
273
* buffer is NULL terminated and pj_cis_match(spec,0) should be
277
pj_strset3(out, scanner->curptr, s);
281
if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
282
pj_scan_skip_whitespace(scanner);
287
PJ_DEF(void) pj_scan_get_unescape( pj_scanner *scanner,
288
const pj_cis_t *spec, pj_str_t *out)
290
register char *s = scanner->curptr;
293
pj_assert(pj_cis_match(spec,0)==0);
295
/* Must not match character '%' */
296
pj_assert(pj_cis_match(spec,'%')==0);
298
/* EOF is detected implicitly */
299
if (!pj_cis_match(spec, *s) && *s != '%') {
300
pj_scan_syntax_err(scanner);
307
if (s+3 <= scanner->end && pj_isxdigit(*(s+1)) &&
310
*dst = (pj_uint8_t) ((pj_hex_digit_to_val(*(s+1)) << 4) +
311
pj_hex_digit_to_val(*(s+2)));
321
if (pj_cis_match(spec, *s)) {
325
} while (pj_cis_match(spec, *s));
327
if (dst != start) pj_memmove(dst, start, s-start);
334
out->slen = (dst - out->ptr);
336
if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
337
pj_scan_skip_whitespace(scanner);
342
PJ_DEF(void) pj_scan_get_quote( pj_scanner *scanner,
343
int begin_quote, int end_quote,
346
char beg = (char)begin_quote;
347
char end = (char)end_quote;
348
pj_scan_get_quotes(scanner, &beg, &end, 1, out);
351
PJ_DEF(void) pj_scan_get_quotes(pj_scanner *scanner,
352
const char *begin_quote, const char *end_quote,
353
int qsize, pj_str_t *out)
355
register char *s = scanner->curptr;
359
pj_assert(qsize > 0);
361
/* Check and eat the begin_quote. */
362
for (i = 0; i < qsize; ++i) {
363
if (*s == begin_quote[i]) {
369
pj_scan_syntax_err(scanner);
374
/* Loop until end_quote is found.
377
/* loop until end_quote is found. */
378
while (PJ_SCAN_CHECK_EOF(s) && *s != '\n' && *s != end_quote[qpair]) {
382
/* check that no backslash character precedes the end_quote. */
383
if (*s == end_quote[qpair]) {
384
if (*(s-1) == '\\') {
385
if (s-2 == scanner->begin) {
391
while (r != scanner->begin && *r == '\\') {
394
/* break from main loop if we have odd number of backslashes */
395
if (((unsigned)(q-r) & 0x01) == 1) {
401
/* end_quote is not preceeded by backslash. break now. */
405
/* loop ended by non-end_quote character. break now. */
410
/* Check and eat the end quote. */
411
if (*s != end_quote[qpair]) {
412
pj_scan_syntax_err(scanner);
417
pj_strset3(out, scanner->curptr, s);
421
if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
422
pj_scan_skip_whitespace(scanner);
427
PJ_DEF(void) pj_scan_get_n( pj_scanner *scanner,
428
unsigned N, pj_str_t *out)
430
if (scanner->curptr + N > scanner->end) {
431
pj_scan_syntax_err(scanner);
435
pj_strset(out, scanner->curptr, N);
437
scanner->curptr += N;
439
if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) {
440
pj_scan_skip_whitespace(scanner);
445
PJ_DEF(int) pj_scan_get_char( pj_scanner *scanner )
447
int chr = *scanner->curptr;
450
pj_scan_syntax_err(scanner);
456
if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) {
457
pj_scan_skip_whitespace(scanner);
463
PJ_DEF(void) pj_scan_get_newline( pj_scanner *scanner )
465
if (!PJ_SCAN_IS_NEWLINE(*scanner->curptr)) {
466
pj_scan_syntax_err(scanner);
470
if (*scanner->curptr == '\r') {
473
if (*scanner->curptr == '\n') {
478
scanner->start_line = scanner->curptr;
481
* This probably is a bug, see PROTOS test #2480.
482
* This would cause scanner to incorrectly eat two new lines, e.g.
485
* Content-Length: 120\r\n
487
* <space><space><space>...
489
* When pj_scan_get_newline() is called to parse the first newline
490
* in the Content-Length header, it will eat the second newline
491
* too because it thinks that it's a header continuation.
493
* if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) {
494
* pj_scan_skip_whitespace(scanner);
500
PJ_DEF(void) pj_scan_get_until( pj_scanner *scanner,
501
const pj_cis_t *spec, pj_str_t *out)
503
register char *s = scanner->curptr;
505
if (s >= scanner->end) {
506
pj_scan_syntax_err(scanner);
510
while (PJ_SCAN_CHECK_EOF(s) && !pj_cis_match(spec, *s)) {
514
pj_strset3(out, scanner->curptr, s);
518
if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
519
pj_scan_skip_whitespace(scanner);
524
PJ_DEF(void) pj_scan_get_until_ch( pj_scanner *scanner,
525
int until_char, pj_str_t *out)
527
register char *s = scanner->curptr;
529
if (s >= scanner->end) {
530
pj_scan_syntax_err(scanner);
534
while (PJ_SCAN_CHECK_EOF(s) && *s != until_char) {
538
pj_strset3(out, scanner->curptr, s);
542
if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
543
pj_scan_skip_whitespace(scanner);
548
PJ_DEF(void) pj_scan_get_until_chr( pj_scanner *scanner,
549
const char *until_spec, pj_str_t *out)
551
register char *s = scanner->curptr;
554
if (s >= scanner->end) {
555
pj_scan_syntax_err(scanner);
559
speclen = strlen(until_spec);
560
while (PJ_SCAN_CHECK_EOF(s) && !memchr(until_spec, *s, speclen)) {
564
pj_strset3(out, scanner->curptr, s);
568
if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
569
pj_scan_skip_whitespace(scanner);
573
PJ_DEF(void) pj_scan_advance_n( pj_scanner *scanner,
574
unsigned N, pj_bool_t skip_ws)
576
if (scanner->curptr + N > scanner->end) {
577
pj_scan_syntax_err(scanner);
581
scanner->curptr += N;
583
if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && skip_ws) {
584
pj_scan_skip_whitespace(scanner);
589
PJ_DEF(int) pj_scan_strcmp( pj_scanner *scanner, const char *s, int len)
591
if (scanner->curptr + len > scanner->end) {
592
pj_scan_syntax_err(scanner);
595
return strncmp(scanner->curptr, s, len);
599
PJ_DEF(int) pj_scan_stricmp( pj_scanner *scanner, const char *s, int len)
601
if (scanner->curptr + len > scanner->end) {
602
pj_scan_syntax_err(scanner);
605
return pj_ansi_strnicmp(scanner->curptr, s, len);
608
PJ_DEF(int) pj_scan_stricmp_alnum( pj_scanner *scanner, const char *s,
611
if (scanner->curptr + len > scanner->end) {
612
pj_scan_syntax_err(scanner);
615
return strnicmp_alnum(scanner->curptr, s, len);
618
PJ_DEF(void) pj_scan_save_state( const pj_scanner *scanner,
619
pj_scan_state *state)
621
state->curptr = scanner->curptr;
622
state->line = scanner->line;
623
state->start_line = scanner->start_line;
627
PJ_DEF(void) pj_scan_restore_state( pj_scanner *scanner,
628
pj_scan_state *state)
630
scanner->curptr = state->curptr;
631
scanner->line = state->line;
632
scanner->start_line = state->start_line;