~ubuntu-branches/ubuntu/karmic/postgresql-8.4/karmic-security

« back to all changes in this revision

Viewing changes to src/pl/plpgsql/src/scan.l

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-05-05 00:58:06 UTC
  • mfrom: (1.1.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20090505005806-c19tt7oyqb7kuw49
Tags: 8.4~beta1+cvs20090503-1
New upstream snapshot.

Show diffs side-by-side

added added

removed removed

Lines of Context:
19
19
#include "mb/pg_wchar.h"
20
20
 
21
21
 
22
 
/* No reason to constrain amount of data slurped */
23
 
#define YY_READ_BUF_SIZE 16777216
24
 
 
25
22
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
26
23
#undef fprintf
27
24
#define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))
28
25
 
 
26
/*
 
27
 * When we parse a token that requires multiple lexer rules to process,
 
28
 * remember the token's starting position this way.
 
29
 */
 
30
#define SAVE_TOKEN_START()  \
 
31
        ( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext )
 
32
 
29
33
/* Handles to the buffer that the lexer uses internally */
30
34
static YY_BUFFER_STATE scanbufhandle;
31
35
static char *scanbuf;
32
36
 
33
37
static const char *scanstr;             /* original input string */
34
38
 
35
 
static int      scanner_functype;
36
 
static bool     scanner_typereported;
37
39
static int      pushback_token;
38
40
static bool have_pushback_token;
39
41
static const char *cur_line_start;
40
42
static int      cur_line_num;
 
43
static int              xcdepth = 0;    /* depth of nesting in slash-star comments */
41
44
static char    *dolqstart;      /* current $foo$ quote start string */
42
 
static int      dolqlen;                        /* signal to plpgsql_get_string_value */
 
45
 
 
46
extern PGDLLIMPORT bool standard_conforming_strings;
43
47
 
44
48
bool plpgsql_SpaceScanned = false;
45
49
%}
54
58
 
55
59
%option case-insensitive
56
60
 
57
 
 
58
 
%x      IN_STRING
59
 
%x      IN_COMMENT
60
 
%x      IN_DOLLARQUOTE
 
61
/*
 
62
 * Exclusive states are a subset of the core lexer's:
 
63
 *  <xc> extended C-style comments
 
64
 *  <xq> standard quoted strings
 
65
 *  <xe> extended quoted strings (support backslash escape sequences)
 
66
 *  <xdolq> $foo$ quoted strings
 
67
 */
 
68
 
 
69
%x xc
 
70
%x xe
 
71
%x xq
 
72
%x xdolq
 
73
 
 
74
/*
 
75
 * Definitions --- these generally must match the core lexer, but in some
 
76
 * cases we can simplify, since we only care about identifying the token
 
77
 * boundaries and not about deriving the represented value.  Also, we
 
78
 * aren't trying to lex multicharacter operators so their interactions
 
79
 * with comments go away.
 
80
 */
 
81
 
 
82
space                   [ \t\n\r\f]
 
83
horiz_space             [ \t\f]
 
84
newline                 [\n\r]
 
85
non_newline             [^\n\r]
 
86
 
 
87
comment                 ("--"{non_newline}*)
 
88
 
 
89
whitespace              ({space}+|{comment})
 
90
special_whitespace              ({space}+|{comment}{newline})
 
91
horiz_whitespace                ({horiz_space}|{comment})
 
92
whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
 
93
 
 
94
quote                   '
 
95
quotestop               {quote}{whitespace}*
 
96
quotecontinue   {quote}{whitespace_with_newline}{quote}
 
97
quotefail               {quote}{whitespace}*"-"
 
98
 
 
99
xestart                 [eE]{quote}
 
100
xeinside                [^\\']+
 
101
xeescape                [\\].
 
102
 
 
103
xqstart                 {quote}
 
104
xqdouble                {quote}{quote}
 
105
xqinside                [^']+
 
106
 
 
107
dolq_start              [A-Za-z\200-\377_]
 
108
dolq_cont               [A-Za-z\200-\377_0-9]
 
109
dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
 
110
dolqfailed              \${dolq_start}{dolq_cont}*
 
111
dolqinside              [^$]+
 
112
 
 
113
xcstart                 \/\*
 
114
xcstop                  \*+\/
 
115
xcinside                [^*/]+
61
116
 
62
117
digit                   [0-9]
63
118
ident_start             [A-Za-z\200-\377_]
64
119
ident_cont              [A-Za-z\200-\377_0-9\$]
65
120
 
 
121
/* This is a simpler treatment of quoted identifiers than the core uses */
66
122
quoted_ident    (\"[^\"]*\")+
67
123
 
68
124
identifier              ({ident_start}{ident_cont}*|{quoted_ident})
69
125
 
70
126
param                   \${digit}+
71
127
 
72
 
space                   [ \t\n\r\f]
73
 
 
74
 
/* $foo$ style quotes ("dollar quoting")
75
 
 * copied straight from the backend SQL parser
76
 
 */
77
 
dolq_start              [A-Za-z\200-\377_]
78
 
dolq_cont               [A-Za-z\200-\377_0-9]
79
 
dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
80
 
dolqinside              [^$]+
81
 
 
82
128
%%
83
129
    /* ----------
84
130
     * Local variables in scanner to remember where
96
142
    plpgsql_SpaceScanned = false;
97
143
 
98
144
    /* ----------
99
 
     * On the first call to a new source report the
100
 
     * function's type (T_FUNCTION or T_TRIGGER)
101
 
     * ----------
102
 
     */
103
 
        if (!scanner_typereported)
104
 
        {
105
 
                scanner_typereported = true;
106
 
                return scanner_functype;
107
 
        }
108
 
 
109
 
    /* ----------
110
145
     * The keyword rules
111
146
     * ----------
112
147
     */
225
260
 
226
261
{digit}+                { return T_NUMBER;                      }
227
262
 
228
 
\".                             {
229
 
                                plpgsql_error_lineno = plpgsql_scanner_lineno();
230
 
                                ereport(ERROR,
231
 
                                                (errcode(ERRCODE_DATATYPE_MISMATCH),
232
 
                                                 errmsg("unterminated quoted identifier")));
233
 
                        }
234
 
 
235
 
    /* ----------
236
 
     * Ignore whitespaces but remember this happened
237
 
     * ----------
238
 
     */
239
 
{space}+                { plpgsql_SpaceScanned = true;          }
240
 
 
241
 
    /* ----------
242
 
     * Eat up comments
243
 
     * ----------
244
 
     */
245
 
--[^\r\n]*              ;
246
 
 
247
 
\/\*                    { start_lineno = plpgsql_scanner_lineno();
248
 
                          BEGIN(IN_COMMENT);
249
 
                        }
250
 
<IN_COMMENT>\*\/        { BEGIN(INITIAL); plpgsql_SpaceScanned = true; }
251
 
<IN_COMMENT>\n          ;
252
 
<IN_COMMENT>.           ;
253
 
<IN_COMMENT><<EOF>>     {
254
 
                                plpgsql_error_lineno = start_lineno;
255
 
                                ereport(ERROR,
256
 
                                                (errcode(ERRCODE_DATATYPE_MISMATCH),
257
 
                                                 errmsg("unterminated /* comment")));
258
 
                        }
259
 
 
260
 
    /* ----------
261
 
     * Collect anything inside of ''s and return one STRING token
262
 
         *
263
 
         * Hacking yytext/yyleng here lets us avoid using yymore(), which is
264
 
         * a win for performance.  It's safe because we know the underlying
265
 
         * input buffer is not changing.
266
 
     * ----------
267
 
     */
268
 
'                       {
269
 
                          start_lineno = plpgsql_scanner_lineno();
270
 
                          start_charpos = yytext;
271
 
                          BEGIN(IN_STRING);
272
 
                        }
273
 
[eE]'           {
274
 
                          /* for now, treat the same as a regular literal */
275
 
                          start_lineno = plpgsql_scanner_lineno();
276
 
                          start_charpos = yytext;
277
 
                          BEGIN(IN_STRING);
278
 
                        }
279
 
<IN_STRING>\\.          { }
280
 
<IN_STRING>\\           { /* can only happen with \ at EOF */ }
281
 
<IN_STRING>''           { }
282
 
<IN_STRING>'            {
283
 
                          /* tell plpgsql_get_string_value it's not a dollar quote */
284
 
                          dolqlen = 0;
285
 
                          /* adjust yytext/yyleng to describe whole string token */
286
 
                          yyleng += (yytext - start_charpos);
287
 
                          yytext = start_charpos;
288
 
                          BEGIN(INITIAL);
289
 
                          return T_STRING;
290
 
                        }
291
 
<IN_STRING>[^'\\]+      { }
292
 
<IN_STRING><<EOF>>      {
293
 
                                plpgsql_error_lineno = start_lineno;
294
 
                                ereport(ERROR,
295
 
                                                (errcode(ERRCODE_DATATYPE_MISMATCH),
296
 
                                                 errmsg("unterminated quoted string")));
297
 
                        }
298
 
 
299
 
{dolqdelim}             {
300
 
                          start_lineno = plpgsql_scanner_lineno();
301
 
                          start_charpos = yytext;
302
 
                          dolqstart = pstrdup(yytext);
303
 
                          BEGIN(IN_DOLLARQUOTE);
304
 
                        }
305
 
<IN_DOLLARQUOTE>{dolqdelim} {
306
 
                          if (strcmp(yytext, dolqstart) == 0)
307
 
                          {
308
 
                                        pfree(dolqstart);
309
 
                                        /* tell plpgsql_get_string_value it is a dollar quote */
310
 
                                        dolqlen = yyleng;
 
263
\".                             { yyerror("unterminated quoted identifier"); }
 
264
 
 
265
    /* ----------
 
266
     * Ignore whitespace (including comments) but remember this happened
 
267
     * ----------
 
268
     */
 
269
{whitespace}    { plpgsql_SpaceScanned = true; }
 
270
 
 
271
    /* ----------
 
272
     * Comment and literal handling is mostly copied from the core lexer
 
273
     * ----------
 
274
     */
 
275
{xcstart}               {
 
276
                                        /* Set location in case of syntax error in comment */
 
277
                                        SAVE_TOKEN_START();
 
278
                                        xcdepth = 0;
 
279
                                        BEGIN(xc);
 
280
                                        plpgsql_SpaceScanned = true;
 
281
                                }
 
282
 
 
283
<xc>{xcstart}   {
 
284
                                        xcdepth++;
 
285
                                }
 
286
 
 
287
<xc>{xcstop}    {
 
288
                                        if (xcdepth <= 0)
 
289
                                                BEGIN(INITIAL);
 
290
                                        else
 
291
                                                xcdepth--;
 
292
                                }
 
293
 
 
294
<xc>{xcinside}  {
 
295
                                        /* ignore */
 
296
                                }
 
297
 
 
298
<xc>\/+                 {
 
299
                                        /* ignore */
 
300
                                }
 
301
 
 
302
<xc>\*+                 {
 
303
                                        /* ignore */
 
304
                                }
 
305
 
 
306
<xc><<EOF>>             { yyerror("unterminated /* comment"); }
 
307
 
 
308
{xqstart}               {
 
309
                                        SAVE_TOKEN_START();
 
310
                                        if (standard_conforming_strings)
 
311
                                                BEGIN(xq);
 
312
                                        else
 
313
                                                BEGIN(xe);
 
314
                                }
 
315
{xestart}               {
 
316
                                        SAVE_TOKEN_START();
 
317
                                        BEGIN(xe);
 
318
                                }
 
319
<xq,xe>{quotestop}      |
 
320
<xq,xe>{quotefail} {
 
321
                                        yyless(1);
 
322
                                        BEGIN(INITIAL);
311
323
                                        /* adjust yytext/yyleng to describe whole string token */
312
324
                                        yyleng += (yytext - start_charpos);
313
325
                                        yytext = start_charpos;
314
 
                                        BEGIN(INITIAL);
315
326
                                        return T_STRING;
316
 
                          }
317
 
                          else
318
 
                          {
319
 
                                        /*
320
 
                                         * When we fail to match $...$ to dolqstart, transfer
321
 
                                         * the $... part to the output, but put back the final
322
 
                                         * $ for rescanning.  Consider $delim$...$junk$delim$
323
 
                                         */
324
 
                                        yyless(yyleng-1);
325
 
                          }
326
 
                        }
327
 
<IN_DOLLARQUOTE>{dolqinside} { }
328
 
<IN_DOLLARQUOTE>.       { /* needed for $ inside the quoted text */ }
329
 
<IN_DOLLARQUOTE><<EOF>> {
330
 
                                plpgsql_error_lineno = start_lineno;
331
 
                                ereport(ERROR,
332
 
                                                (errcode(ERRCODE_DATATYPE_MISMATCH),
333
 
                                                 errmsg("unterminated dollar-quoted string")));
334
 
                        }
 
327
                                }
 
328
<xq,xe>{xqdouble} {
 
329
                                }
 
330
<xq>{xqinside}  {
 
331
                                }
 
332
<xe>{xeinside}  {
 
333
                                }
 
334
<xe>{xeescape}  {
 
335
                                }
 
336
<xq,xe>{quotecontinue} {
 
337
                                        /* ignore */
 
338
                                }
 
339
<xe>.                   {
 
340
                                        /* This is only needed for \ just before EOF */
 
341
                                }
 
342
<xq,xe><<EOF>>          { yyerror("unterminated quoted string"); }
 
343
 
 
344
{dolqdelim}             {
 
345
                                        SAVE_TOKEN_START();
 
346
                                        dolqstart = pstrdup(yytext);
 
347
                                        BEGIN(xdolq);
 
348
                                }
 
349
{dolqfailed}    {
 
350
                                        /* throw back all but the initial "$" */
 
351
                                        yyless(1);
 
352
                                        /* and treat it as {other} */
 
353
                                        return yytext[0];
 
354
                                }
 
355
<xdolq>{dolqdelim} {
 
356
                                        if (strcmp(yytext, dolqstart) == 0)
 
357
                                        {
 
358
                                                pfree(dolqstart);
 
359
                                                BEGIN(INITIAL);
 
360
                                                /* adjust yytext/yyleng to describe whole string */
 
361
                                                yyleng += (yytext - start_charpos);
 
362
                                                yytext = start_charpos;
 
363
                                                return T_STRING;
 
364
                                        }
 
365
                                        else
 
366
                                        {
 
367
                                                /*
 
368
                                                 * When we fail to match $...$ to dolqstart, transfer
 
369
                                                 * the $... part to the output, but put back the final
 
370
                                                 * $ for rescanning.  Consider $delim$...$junk$delim$
 
371
                                                 */
 
372
                                                yyless(yyleng-1);
 
373
                                        }
 
374
                                }
 
375
<xdolq>{dolqinside} {
 
376
                                }
 
377
<xdolq>{dolqfailed} {
 
378
                                }
 
379
<xdolq>.                {
 
380
                                        /* This is only needed for $ inside the quoted text */
 
381
                                }
 
382
<xdolq><<EOF>>  { yyerror("unterminated dollar-quoted string"); }
335
383
 
336
384
    /* ----------
337
385
     * Any unmatched character is returned as is
338
386
     * ----------
339
387
     */
340
 
.                       { return yytext[0];                     }
 
388
.                               {
 
389
                                        return yytext[0];
 
390
                                }
341
391
 
342
392
%%
343
393
 
437
487
 * to cite in error messages.
438
488
 */
439
489
void
440
 
plpgsql_scanner_init(const char *str, int functype)
 
490
plpgsql_scanner_init(const char *str)
441
491
{
442
492
        Size    slen;
443
493
 
460
510
        /* Other setup */
461
511
        scanstr = str;
462
512
 
463
 
    scanner_functype = functype;
464
 
    scanner_typereported = false;
465
 
 
466
513
        have_pushback_token = false;
467
514
 
468
515
        cur_line_start = scanbuf;
493
540
        yy_delete_buffer(scanbufhandle);
494
541
        pfree(scanbuf);
495
542
}
496
 
 
497
 
/*
498
 
 * Called after a T_STRING token is read to get the string literal's value
499
 
 * as a palloc'd string.  (We make this a separate call because in many
500
 
 * scenarios there's no need to get the decoded value.)
501
 
 *
502
 
 * Note: we expect the literal to be the most recently lexed token.  This
503
 
 * would not work well if we supported multiple-token pushback or if
504
 
 * plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
505
 
 */
506
 
char *
507
 
plpgsql_get_string_value(void)
508
 
{
509
 
        char       *result;
510
 
        const char *cp;
511
 
        int                     len;
512
 
 
513
 
        if (dolqlen > 0)
514
 
        {
515
 
                /* Token is a $foo$...$foo$ string */
516
 
                len = yyleng - 2 * dolqlen;
517
 
                Assert(len >= 0);
518
 
                result = (char *) palloc(len + 1);
519
 
                memcpy(result, yytext + dolqlen, len);
520
 
                result[len] = '\0';
521
 
        }
522
 
        else if (*yytext == 'E' || *yytext == 'e')
523
 
        {
524
 
                /* Token is an E'...' string */
525
 
                result = (char *) palloc(yyleng + 1);   /* more than enough room */
526
 
                len = 0;
527
 
                for (cp = yytext + 2; *cp; cp++)
528
 
                {
529
 
                        if (*cp == '\'')
530
 
                        {
531
 
                                if (cp[1] == '\'')
532
 
                                        result[len++] = *cp++;
533
 
                                /* else it must be string end quote */
534
 
                        }
535
 
                        else if (*cp == '\\')
536
 
                        {
537
 
                                if (cp[1] != '\0')      /* just a paranoid check */
538
 
                                        result[len++] = *(++cp);
539
 
                        }
540
 
                        else
541
 
                                result[len++] = *cp;
542
 
                }
543
 
                result[len] = '\0';
544
 
        }
545
 
        else
546
 
        {
547
 
                /* Token is a '...' string */
548
 
                result = (char *) palloc(yyleng + 1);   /* more than enough room */
549
 
                len = 0;
550
 
                for (cp = yytext + 1; *cp; cp++)
551
 
                {
552
 
                        if (*cp == '\'')
553
 
                        {
554
 
                                if (cp[1] == '\'')
555
 
                                        result[len++] = *cp++;
556
 
                                /* else it must be string end quote */
557
 
                        }
558
 
                        else if (*cp == '\\')
559
 
                        {
560
 
                                if (cp[1] != '\0')      /* just a paranoid check */
561
 
                                        result[len++] = *(++cp);
562
 
                        }
563
 
                        else
564
 
                                result[len++] = *cp;
565
 
                }
566
 
                result[len] = '\0';
567
 
        }
568
 
        return result;
569
 
}