~vcs-imports/mammoth-replicator/trunk

« back to all changes in this revision

Viewing changes to src/bin/psql/psqlscan.l

  • Committer: alvherre
  • Date: 2005-12-16 21:24:52 UTC
  • Revision ID: svn-v4:db760fc0-0f08-0410-9d63-cc6633f64896:trunk:1
Initial import of the REL8_0_3 sources from the Pgsql CVS repository.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
%{
 
2
/*-------------------------------------------------------------------------
 
3
 *
 
4
 * psqlscan.l
 
5
 *        lexical scanner for psql
 
6
 *
 
7
 * This code is mainly needed to determine where the end of a SQL statement
 
8
 * is: we are looking for semicolons that are not within quotes, comments,
 
9
 * or parentheses.  The most reliable way to handle this is to borrow the
 
10
 * backend's flex lexer rules, lock, stock, and barrel.  The rules below
 
11
 * are (except for a few) the same as the backend's, but their actions are
 
12
 * just ECHO whereas the backend's actions generally do other things.
 
13
 *
 
14
 * XXX The rules in this file must be kept in sync with the main parser!!!
 
15
 *
 
16
 * The most difficult aspect of this code is that we need to work in multibyte
 
17
 * encodings that are not ASCII-safe.  A "safe" encoding is one in which each
 
18
 * byte of a multibyte character has the high bit set (it's >= 0x80).  Since
 
19
 * all our lexing rules treat all high-bit-set characters alike, we don't
 
20
 * really need to care whether such a byte is part of a sequence or not.
 
21
 * In an "unsafe" encoding, we still expect the first byte of a multibyte
 
22
 * sequence to be >= 0x80, but later bytes might not be.  If we scan such
 
23
 * a sequence as-is, the lexing rules could easily be fooled into matching
 
24
 * such bytes to ordinary ASCII characters.  Our solution for this is to
 
25
 * substitute 0xFF for each non-first byte within the data presented to flex.
 
26
 * The flex rules will then pass the FF's through unmolested.  The emit()
 
27
 * subroutine is responsible for looking back to the original string and
 
28
 * replacing FF's with the corresponding original bytes.
 
29
 *
 
30
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 
31
 * Portions Copyright (c) 1994, Regents of the University of California
 
32
 *
 
33
 * IDENTIFICATION
 
34
 *        $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.9 2004-12-31 22:03:15 pgsql Exp $
 
35
 *
 
36
 *-------------------------------------------------------------------------
 
37
 */
 
38
#include "postgres_fe.h"
 
39
 
 
40
#include "psqlscan.h"
 
41
 
 
42
#include <ctype.h>
 
43
 
 
44
#include "mb/pg_wchar.h"
 
45
 
 
46
#include "common.h"
 
47
#include "settings.h"
 
48
#include "variables.h"
 
49
 
 
50
 
 
51
/*
 
52
 * We use a stack of flex buffers to handle substitution of psql variables.
 
53
 * Each stacked buffer contains the as-yet-unread text from one psql variable.
 
54
 * When we pop the stack all the way, we resume reading from the outer buffer
 
55
 * identified by scanbufhandle.
 
56
 */
 
57
typedef struct StackElem
 
58
{
 
59
        YY_BUFFER_STATE buf;            /* flex input control structure */
 
60
        char       *bufstring;          /* data actually being scanned by flex */
 
61
        char       *origstring;         /* copy of original data, if needed */
 
62
        struct StackElem *next;
 
63
} StackElem;
 
64
 
 
65
/*
 
66
 * All working state of the lexer must be stored in PsqlScanStateData
 
67
 * between calls.  This allows us to have multiple open lexer operations,
 
68
 * which is needed for nested include files.  The lexer itself is not
 
69
 * recursive, but it must be re-entrant.
 
70
 */
 
71
typedef struct PsqlScanStateData
 
72
{
 
73
        StackElem  *buffer_stack;       /* stack of variable expansion buffers */
 
74
        /*
 
75
         * These variables always refer to the outer buffer, never to any
 
76
         * stacked variable-expansion buffer.
 
77
         */
 
78
        YY_BUFFER_STATE scanbufhandle;
 
79
        char       *scanbuf;            /* start of outer-level input buffer */
 
80
        const char *scanline;           /* current input line at outer level */
 
81
 
 
82
        /* safe_encoding, curline, refline are used by emit() to replace FFs */
 
83
        int                     encoding;               /* encoding being used now */
 
84
        bool            safe_encoding;  /* is current encoding "safe"? */
 
85
        const char *curline;            /* actual flex input string for cur buf */
 
86
        const char *refline;            /* original data for cur buffer */
 
87
 
 
88
        /*
 
89
         * All this state lives across successive input lines, until explicitly
 
90
         * reset by psql_scan_reset.
 
91
         */
 
92
        int                     start_state;    /* saved YY_START */
 
93
        int                     paren_depth;    /* depth of nesting in parentheses */
 
94
        int                     xcdepth;                /* depth of nesting in slash-star comments */
 
95
        char       *dolqstart;          /* current $foo$ quote start string */
 
96
} PsqlScanStateData;
 
97
 
 
98
static PsqlScanState cur_state; /* current state while active */
 
99
 
 
100
static PQExpBuffer output_buf;  /* current output buffer */
 
101
 
 
102
/* these variables do not need to be saved across calls */
 
103
static enum slash_option_type option_type;
 
104
static char *option_quote;
 
105
 
 
106
 
 
107
/* Return values from yylex() */
 
108
#define LEXRES_EOL                      0       /* end of input */
 
109
#define LEXRES_SEMI                     1       /* command-terminating semicolon found */
 
110
#define LEXRES_BACKSLASH        2       /* backslash command start */
 
111
#define LEXRES_OK                       3       /* OK completion of backslash argument */
 
112
 
 
113
 
 
114
int     yylex(void);
 
115
 
 
116
static void push_new_buffer(const char *newstr);
 
117
static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
 
118
                                                                          char **txtcopy);
 
119
static void emit(const char *txt, int len);
 
120
 
 
121
#define ECHO emit(yytext, yyleng)
 
122
 
 
123
%}
 
124
 
 
125
%option 8bit
 
126
%option never-interactive
 
127
%option nodefault
 
128
%option nounput
 
129
%option noyywrap
 
130
 
 
131
/*
 
132
 * All of the following definitions and rules should exactly match
 
133
 * src/backend/parser/scan.l so far as the flex patterns are concerned.
 
134
 * The rule bodies are just ECHO as opposed to what the backend does,
 
135
 * however.  (But be sure to duplicate code that affects the lexing process,
 
136
 * such as BEGIN().)  Also, psqlscan uses a single <<EOF>> rule whereas
 
137
 * scan.l has a separate one for each exclusive state.
 
138
 */
 
139
 
 
140
/*
 
141
 * OK, here is a short description of lex/flex rules behavior.
 
142
 * The longest pattern which matches an input string is always chosen.
 
143
 * For equal-length patterns, the first occurring in the rules list is chosen.
 
144
 * INITIAL is the starting state, to which all non-conditional rules apply.
 
145
 * Exclusive states change parsing rules while the state is active.  When in
 
146
 * an exclusive state, only those rules defined for that state apply.
 
147
 *
 
148
 * We use exclusive states for quoted strings, extended comments,
 
149
 * and to eliminate parsing troubles for numeric strings.
 
150
 * Exclusive states:
 
151
 *  <xb> bit string literal
 
152
 *  <xc> extended C-style comments
 
153
 *  <xd> delimited identifiers (double-quoted identifiers)
 
154
 *  <xh> hexadecimal numeric string
 
155
 *  <xq> quoted strings
 
156
 *  <xdolq> $foo$ quoted strings
 
157
 */
 
158
 
 
159
%x xb
 
160
%x xc
 
161
%x xd
 
162
%x xh
 
163
%x xq
 
164
%x xdolq
 
165
/* Additional exclusive states for psql only: lex backslash commands */
 
166
%x xslashcmd
 
167
%x xslasharg
 
168
%x xslashquote
 
169
%x xslashbackquote
 
170
%x xslashdefaultarg
 
171
%x xslashquotedarg
 
172
%x xslashwholeline
 
173
%x xslashend
 
174
 
 
175
/*
 
176
 * In order to make the world safe for Windows and Mac clients as well as
 
177
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 
178
 * sequence will be seen as two successive newlines, but that doesn't cause
 
179
 * any problems.  Comments that start with -- and extend to the next
 
180
 * newline are treated as equivalent to a single whitespace character.
 
181
 *
 
182
 * NOTE a fine point: if there is no newline following --, we will absorb
 
183
 * everything to the end of the input as a comment.  This is correct.  Older
 
184
 * versions of Postgres failed to recognize -- as a comment if the input
 
185
 * did not end with a newline.
 
186
 *
 
187
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 
188
 */
 
189
 
 
190
space                   [ \t\n\r\f]
 
191
horiz_space             [ \t\f]
 
192
newline                 [\n\r]
 
193
non_newline             [^\n\r]
 
194
 
 
195
comment                 ("--"{non_newline}*)
 
196
 
 
197
whitespace              ({space}+|{comment})
 
198
 
 
199
/*
 
200
 * SQL requires at least one newline in the whitespace separating
 
201
 * string literals that are to be concatenated.  Silly, but who are we
 
202
 * to argue?  Note that {whitespace_with_newline} should not have * after
 
203
 * it, whereas {whitespace} should generally have a * after it...
 
204
 */
 
205
 
 
206
special_whitespace              ({space}+|{comment}{newline})
 
207
horiz_whitespace                ({horiz_space}|{comment})
 
208
whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
 
209
 
 
210
/* Bit string
 
211
 * It is tempting to scan the string for only those characters
 
212
 * which are allowed. However, this leads to silently swallowed
 
213
 * characters if illegal characters are included in the string.
 
214
 * For example, if xbinside is [01] then B'ABCD' is interpreted
 
215
 * as a zero-length string, and the ABCD' is lost!
 
216
 * Better to pass the string forward and let the input routines
 
217
 * validate the contents.
 
218
 */
 
219
xbstart                 [bB]{quote}
 
220
xbstop                  {quote}
 
221
xbinside                [^']*
 
222
xbcat                   {quote}{whitespace_with_newline}{quote}
 
223
 
 
224
/* Hexadecimal number
 
225
 */
 
226
xhstart                 [xX]{quote}
 
227
xhstop                  {quote}
 
228
xhinside                [^']*
 
229
xhcat                   {quote}{whitespace_with_newline}{quote}
 
230
 
 
231
/* National character
 
232
 */
 
233
xnstart                 [nN]{quote}
 
234
 
 
235
/* Extended quote
 
236
 * xqdouble implements embedded quote
 
237
 * xqcat allows strings to cross input lines
 
238
 */
 
239
quote                   '
 
240
xqstart                 {quote}
 
241
xqstop                  {quote}
 
242
xqdouble                {quote}{quote}
 
243
xqinside                [^\\']+
 
244
xqescape                [\\][^0-7]
 
245
xqoctesc                [\\][0-7]{1,3}
 
246
xqcat                   {quote}{whitespace_with_newline}{quote}
 
247
 
 
248
/* $foo$ style quotes ("dollar quoting")
 
249
 * The quoted string starts with $foo$ where "foo" is an optional string
 
250
 * in the form of an identifier, except that it may not contain "$", 
 
251
 * and extends to the first occurrence of an identical string.  
 
252
 * There is *no* processing of the quoted text.
 
253
 */
 
254
dolq_start              [A-Za-z\200-\377_]
 
255
dolq_cont               [A-Za-z\200-\377_0-9]
 
256
dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
 
257
dolqinside              [^$]+
 
258
 
 
259
/* Double quote
 
260
 * Allows embedded spaces and other special characters into identifiers.
 
261
 */
 
262
dquote                  \"
 
263
xdstart                 {dquote}
 
264
xdstop                  {dquote}
 
265
xddouble                {dquote}{dquote}
 
266
xdinside                [^"]+
 
267
 
 
268
/* C-style comments
 
269
 *
 
270
 * The "extended comment" syntax closely resembles allowable operator syntax.
 
271
 * The tricky part here is to get lex to recognize a string starting with
 
272
 * slash-star as a comment, when interpreting it as an operator would produce
 
273
 * a longer match --- remember lex will prefer a longer match!  Also, if we
 
274
 * have something like plus-slash-star, lex will think this is a 3-character
 
275
 * operator whereas we want to see it as a + operator and a comment start.
 
276
 * The solution is two-fold:
 
277
 * 1. append {op_chars}* to xcstart so that it matches as much text as
 
278
 *    {operator} would. Then the tie-breaker (first matching rule of same
 
279
 *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 
280
 *    in case it contains a star-slash that should terminate the comment.
 
281
 * 2. In the operator rule, check for slash-star within the operator, and
 
282
 *    if found throw it back with yyless().  This handles the plus-slash-star
 
283
 *    problem.
 
284
 * Dash-dash comments have similar interactions with the operator rule.
 
285
 */
 
286
xcstart                 \/\*{op_chars}*
 
287
xcstop                  \*+\/
 
288
xcinside                [^*/]+
 
289
 
 
290
digit                   [0-9]
 
291
ident_start             [A-Za-z\200-\377_]
 
292
ident_cont              [A-Za-z\200-\377_0-9\$]
 
293
 
 
294
identifier              {ident_start}{ident_cont}*
 
295
 
 
296
typecast                "::"
 
297
 
 
298
/*
 
299
 * "self" is the set of chars that should be returned as single-character
 
300
 * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 
301
 * which can be one or more characters long (but if a single-char token
 
302
 * appears in the "self" set, it is not to be returned as an Op).  Note
 
303
 * that the sets overlap, but each has some chars that are not in the other.
 
304
 *
 
305
 * If you change either set, adjust the character lists appearing in the
 
306
 * rule for "operator"!
 
307
 */
 
308
self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 
309
op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 
310
operator                {op_chars}+
 
311
 
 
312
/* we no longer allow unary minus in numbers. 
 
313
 * instead we pass it separately to parser. there it gets
 
314
 * coerced via doNegate() -- Leon aug 20 1999 
 
315
 */
 
316
 
 
317
integer                 {digit}+
 
318
decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
 
319
real                    ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
 
320
 
 
321
param                   \${integer}
 
322
 
 
323
other                   .
 
324
 
 
325
/*
 
326
 * Dollar quoted strings are totally opaque, and no escaping is done on them.
 
327
 * Other quoted strings must allow some special characters such as single-quote
 
328
 *  and newline.
 
329
 * Embedded single-quotes are implemented both in the SQL standard
 
330
 *  style of two adjacent single quotes "''" and in the Postgres/Java style
 
331
 *  of escaped-quote "\'".
 
332
 * Other embedded escaped characters are matched explicitly and the leading
 
333
 *  backslash is dropped from the string.
 
334
 * Note that xcstart must appear before operator, as explained above!
 
335
 *  Also whitespace (comment) must appear before operator.
 
336
 */
 
337
 
 
338
%%
 
339
 
 
340
{whitespace}    {
 
341
                                        /*
 
342
                                         * Note that the whitespace rule includes both true
 
343
                                         * whitespace and single-line ("--" style) comments.
 
344
                                         * We suppress whitespace at the start of the query
 
345
                                         * buffer.  We also suppress all single-line comments,
 
346
                                         * which is pretty dubious but is the historical
 
347
                                         * behavior.
 
348
                                         */
 
349
                                        if (!(output_buf->len == 0 || yytext[0] == '-'))
 
350
                                                ECHO;
 
351
                                }
 
352
 
 
353
{xcstart}               {
 
354
                                        cur_state->xcdepth = 0;
 
355
                                        BEGIN(xc);
 
356
                                        /* Put back any characters past slash-star; see above */
 
357
                                        yyless(2);
 
358
                                        ECHO;
 
359
                                }
 
360
 
 
361
<xc>{xcstart}   {
 
362
                                        cur_state->xcdepth++;
 
363
                                        /* Put back any characters past slash-star; see above */
 
364
                                        yyless(2);
 
365
                                        ECHO;
 
366
                                }
 
367
 
 
368
<xc>{xcstop}    {
 
369
                                        if (cur_state->xcdepth <= 0)
 
370
                                        {
 
371
                                                BEGIN(INITIAL);
 
372
                                        }
 
373
                                        else
 
374
                                                cur_state->xcdepth--;
 
375
                                        ECHO;
 
376
                                }
 
377
 
 
378
<xc>{xcinside}  {
 
379
                                        ECHO;
 
380
                                }
 
381
 
 
382
<xc>{op_chars}  {
 
383
                                        ECHO;
 
384
                                }
 
385
 
 
386
{xbstart}               {
 
387
                                        BEGIN(xb);
 
388
                                        ECHO;
 
389
                                }
 
390
<xb>{xbstop}    {
 
391
                                        BEGIN(INITIAL);
 
392
                                        ECHO;
 
393
                                }
 
394
<xh>{xhinside}  |
 
395
<xb>{xbinside}  {
 
396
                                        ECHO;
 
397
                                }
 
398
<xh>{xhcat}             |
 
399
<xb>{xbcat}             {
 
400
                                        ECHO;
 
401
                                }
 
402
 
 
403
{xhstart}               {
 
404
                                        /* Hexadecimal bit type.
 
405
                                         * At some point we should simply pass the string
 
406
                                         * forward to the parser and label it there.
 
407
                                         * In the meantime, place a leading "x" on the string
 
408
                                         * to mark it for the input routine as a hex string.
 
409
                                         */
 
410
                                        BEGIN(xh);
 
411
                                        ECHO;
 
412
                                }
 
413
<xh>{xhstop}    {
 
414
                                        BEGIN(INITIAL);
 
415
                                        ECHO;
 
416
                                }
 
417
 
 
418
{xnstart}               {
 
419
                                        BEGIN(xq);
 
420
                                        ECHO;
 
421
                                }
 
422
 
 
423
{xqstart}               {
 
424
                                        BEGIN(xq);
 
425
                                        ECHO;
 
426
                                }
 
427
<xq>{xqstop}    {
 
428
                                        BEGIN(INITIAL);
 
429
                                        ECHO;
 
430
                                }
 
431
<xq>{xqdouble}  {
 
432
                                        ECHO;
 
433
                                }
 
434
<xq>{xqinside}  {
 
435
                                        ECHO;
 
436
                                }
 
437
<xq>{xqescape}  {
 
438
                                        ECHO;
 
439
                                }
 
440
<xq>{xqoctesc}  {
 
441
                                        ECHO;
 
442
                                }
 
443
<xq>{xqcat}             {
 
444
                                        ECHO;
 
445
                                }
 
446
<xq>.                   {
 
447
                                        /* This is only needed for \ just before EOF */
 
448
                                        ECHO;
 
449
                                }
 
450
 
 
451
{dolqdelim}             {
 
452
                                        cur_state->dolqstart = pg_strdup(yytext);
 
453
                                        BEGIN(xdolq);
 
454
                                        ECHO;
 
455
                                }
 
456
<xdolq>{dolqdelim} {
 
457
                                        if (strcmp(yytext, cur_state->dolqstart) == 0)
 
458
                                        {
 
459
                                                free(cur_state->dolqstart);
 
460
                                                cur_state->dolqstart = NULL;
 
461
                                                BEGIN(INITIAL);
 
462
                                        }
 
463
                                        else
 
464
                                        {
 
465
                                                /*
 
466
                                                 * When we fail to match $...$ to dolqstart, transfer
 
467
                                                 * the $... part to the output, but put back the final
 
468
                                                 * $ for rescanning.  Consider $delim$...$junk$delim$
 
469
                                                 */
 
470
                                                yyless(yyleng-1);
 
471
                                        }
 
472
                                        ECHO;
 
473
                                }
 
474
<xdolq>{dolqinside} {
 
475
                                        ECHO;
 
476
                                }
 
477
<xdolq>.                {
 
478
                                        /* This is only needed for $ inside the quoted text */
 
479
                                        ECHO;
 
480
                                }
 
481
 
 
482
{xdstart}               {
 
483
                                        BEGIN(xd);
 
484
                                        ECHO;
 
485
                                }
 
486
<xd>{xdstop}    {
 
487
                                        BEGIN(INITIAL);
 
488
                                        ECHO;
 
489
                                }
 
490
<xd>{xddouble}  {
 
491
                                        ECHO;
 
492
                                }
 
493
<xd>{xdinside}  {
 
494
                                        ECHO;
 
495
                                }
 
496
 
 
497
{typecast}              {
 
498
                                        ECHO;
 
499
                                }
 
500
 
 
501
        /*
 
502
         * These rules are specific to psql --- they implement parenthesis
 
503
         * counting and detection of command-ending semicolon.  These must
 
504
         * appear before the {self} rule so that they take precedence over it.
 
505
         */
 
506
 
 
507
"("                             {
 
508
                                        cur_state->paren_depth++;
 
509
                                        ECHO;
 
510
                                }
 
511
 
 
512
")"                             {
 
513
                                        if (cur_state->paren_depth > 0)
 
514
                                                cur_state->paren_depth--;
 
515
                                        ECHO;
 
516
                                }
 
517
 
 
518
";"                             {
 
519
                                        ECHO;
 
520
                                        if (cur_state->paren_depth == 0)
 
521
                                        {
 
522
                                                /* Terminate lexing temporarily */
 
523
                                                return LEXRES_SEMI;
 
524
                                        }
 
525
                                }
 
526
 
 
527
        /*
 
528
         * psql-specific rules to handle backslash commands and variable
 
529
         * substitution.  We want these before {self}, also.
 
530
         */
 
531
 
 
532
"\\"[;:]                {
 
533
                                        /* Force a semicolon or colon into the query buffer */
 
534
                                        emit(yytext + 1, 1);
 
535
                                }
 
536
 
 
537
"\\"                    {
 
538
                                        /* Terminate lexing temporarily */
 
539
                                        return LEXRES_BACKSLASH;
 
540
                                }
 
541
 
 
542
:[A-Za-z0-9_]+  {
 
543
                                        /* Possible psql variable substitution */
 
544
                                        const char *value;
 
545
 
 
546
                                        value = GetVariable(pset.vars, yytext + 1);
 
547
 
 
548
                                        if (value)
 
549
                                        {
 
550
                                                /* It is a variable, perform substitution */
 
551
                                                push_new_buffer(value);
 
552
                                                /* yy_scan_string already made buffer active */
 
553
                                        }
 
554
                                        else
 
555
                                        {
 
556
                                                /*
 
557
                                                 * if the variable doesn't exist we'll copy the
 
558
                                                 * string as is
 
559
                                                 */
 
560
                                                ECHO;
 
561
                                        }
 
562
                                }
 
563
 
 
564
        /*
 
565
         * Back to backend-compatible rules.
 
566
         */
 
567
 
 
568
{self}                  {
 
569
                                        ECHO;
 
570
                                }
 
571
 
 
572
{operator}              {
 
573
                                        /*
 
574
                                         * Check for embedded slash-star or dash-dash; those
 
575
                                         * are comment starts, so operator must stop there.
 
576
                                         * Note that slash-star or dash-dash at the first
 
577
                                         * character will match a prior rule, not this one.
 
578
                                         */
 
579
                                        int             nchars = yyleng;
 
580
                                        char   *slashstar = strstr(yytext, "/*");
 
581
                                        char   *dashdash = strstr(yytext, "--");
 
582
 
 
583
                                        if (slashstar && dashdash)
 
584
                                        {
 
585
                                                /* if both appear, take the first one */
 
586
                                                if (slashstar > dashdash)
 
587
                                                        slashstar = dashdash;
 
588
                                        }
 
589
                                        else if (!slashstar)
 
590
                                                slashstar = dashdash;
 
591
                                        if (slashstar)
 
592
                                                nchars = slashstar - yytext;
 
593
 
 
594
                                        /*
 
595
                                         * For SQL compatibility, '+' and '-' cannot be the
 
596
                                         * last char of a multi-char operator unless the operator
 
597
                                         * contains chars that are not in SQL operators.
 
598
                                         * The idea is to lex '=-' as two operators, but not
 
599
                                         * to forbid operator names like '?-' that could not be
 
600
                                         * sequences of SQL operators.
 
601
                                         */
 
602
                                        while (nchars > 1 &&
 
603
                                                   (yytext[nchars-1] == '+' ||
 
604
                                                        yytext[nchars-1] == '-'))
 
605
                                        {
 
606
                                                int             ic;
 
607
 
 
608
                                                for (ic = nchars-2; ic >= 0; ic--)
 
609
                                                {
 
610
                                                        if (strchr("~!@#^&|`?%", yytext[ic]))
 
611
                                                                break;
 
612
                                                }
 
613
                                                if (ic >= 0)
 
614
                                                        break; /* found a char that makes it OK */
 
615
                                                nchars--; /* else remove the +/-, and check again */
 
616
                                        }
 
617
 
 
618
                                        if (nchars < yyleng)
 
619
                                        {
 
620
                                                /* Strip the unwanted chars from the token */
 
621
                                                yyless(nchars);
 
622
                                        }
 
623
                                        ECHO;
 
624
                                }
 
625
 
 
626
{param}                 {
 
627
                                        ECHO;
 
628
                                }
 
629
 
 
630
{integer}               {
 
631
                                        ECHO;
 
632
                                }
 
633
{decimal}               {
 
634
                                        ECHO;
 
635
                                }
 
636
{real}                  {
 
637
                                        ECHO;
 
638
                                }
 
639
 
 
640
 
 
641
{identifier}    {
 
642
                                        ECHO;
 
643
                                }
 
644
 
 
645
{other}                 {
 
646
                                        ECHO;
 
647
                                }
 
648
 
 
649
 
 
650
        /*
 
651
         * Everything from here down is psql-specific.
 
652
         */
 
653
 
 
654
<<EOF>>                 {
 
655
                                        StackElem  *stackelem = cur_state->buffer_stack;
 
656
 
 
657
                                        if (stackelem == NULL)
 
658
                                                return LEXRES_EOL; /* end of input reached */
 
659
 
 
660
                                        /*
 
661
                                         * We were expanding a variable, so pop the inclusion
 
662
                                         * stack and keep lexing
 
663
                                         */
 
664
                                        cur_state->buffer_stack = stackelem->next;
 
665
                                        yy_delete_buffer(stackelem->buf);
 
666
                                        free(stackelem->bufstring);
 
667
                                        if (stackelem->origstring)
 
668
                                                free(stackelem->origstring);
 
669
                                        free(stackelem);
 
670
 
 
671
                                        stackelem = cur_state->buffer_stack;
 
672
                                        if (stackelem != NULL)
 
673
                                        {
 
674
                                                yy_switch_to_buffer(stackelem->buf);
 
675
                                                cur_state->curline = stackelem->bufstring;
 
676
                                                cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
 
677
                                        }
 
678
                                        else
 
679
                                        {
 
680
                                                yy_switch_to_buffer(cur_state->scanbufhandle);
 
681
                                                cur_state->curline = cur_state->scanbuf;
 
682
                                                cur_state->refline = cur_state->scanline;
 
683
                                        }
 
684
                                }
 
685
 
 
686
        /*
 
687
         * Exclusive lexer states to handle backslash command lexing
 
688
         */
 
689
 
 
690
<xslashcmd>{
 
691
        /* command name ends at whitespace or backslash; eat all else */
 
692
 
 
693
{space}|"\\"    {
 
694
                                        yyless(0);
 
695
                                        return LEXRES_OK;
 
696
                                }
 
697
 
 
698
{other}                 { ECHO; }
 
699
 
 
700
}
 
701
 
 
702
<xslasharg>{
 
703
        /* eat any whitespace, then decide what to do at first nonblank */
 
704
 
 
705
{space}+                { }
 
706
 
 
707
"\\"                    {
 
708
                                        /*
 
709
                                         * backslash is end of command or next command, do not eat
 
710
                                         *
 
711
                                         * XXX this means we can't conveniently accept options
 
712
                                         * that start with a backslash; therefore, option
 
713
                                         * processing that encourages use of backslashes is rather
 
714
                                         * broken.
 
715
                                         */
 
716
                                        yyless(0);
 
717
                                        return LEXRES_OK;
 
718
                                }
 
719
 
 
720
{quote}                 {
 
721
                                        *option_quote = '\'';
 
722
                                        BEGIN(xslashquote);
 
723
                                }
 
724
 
 
725
"`"                             {
 
726
                                        if (option_type == OT_VERBATIM)
 
727
                                        {
 
728
                                                /* in verbatim mode, backquote is not special */
 
729
                                                ECHO;
 
730
                                                BEGIN(xslashdefaultarg);
 
731
                                        }
 
732
                                        else
 
733
                                        {
 
734
                                                *option_quote = '`';
 
735
                                                BEGIN(xslashbackquote);
 
736
                                        }
 
737
                                }
 
738
 
 
739
:[A-Za-z0-9_]*  {
 
740
                                        /* Possible psql variable substitution */
 
741
                                        if (option_type == OT_VERBATIM)
 
742
                                                ECHO;
 
743
                                        else
 
744
                                        {
 
745
                                                const char *value;
 
746
 
 
747
                                                value = GetVariable(pset.vars, yytext + 1);
 
748
 
 
749
                                                /*
 
750
                                                 * The variable value is just emitted without any
 
751
                                                 * further examination.  This is consistent with the
 
752
                                                 * pre-8.0 code behavior, if not with the way that
 
753
                                                 * variables are handled outside backslash commands.
 
754
                                                 */
 
755
                                                if (value)
 
756
                                                        appendPQExpBufferStr(output_buf, value);
 
757
                                        }
 
758
 
 
759
                                        *option_quote = ':';
 
760
 
 
761
                                        return LEXRES_OK;
 
762
                                }
 
763
 
 
764
"|"                             {
 
765
                                        ECHO;
 
766
                                        if (option_type == OT_FILEPIPE)
 
767
                                        {
 
768
                                                /* treat like whole-string case */
 
769
                                                BEGIN(xslashwholeline);
 
770
                                        }
 
771
                                        else
 
772
                                        {
 
773
                                                /* treat like default case */
 
774
                                                BEGIN(xslashdefaultarg);
 
775
                                        }
 
776
                                }
 
777
 
 
778
{dquote}                {
 
779
                                        *option_quote = '"';
 
780
                                        ECHO;
 
781
                                        BEGIN(xslashquotedarg);
 
782
                                }
 
783
 
 
784
{other}                 {
 
785
                                        ECHO;
 
786
                                        BEGIN(xslashdefaultarg);
 
787
                                }
 
788
 
 
789
}
 
790
 
 
791
<xslashquote>{
 
792
        /* single-quoted text: copy literally except for backslash sequences */
 
793
 
 
794
{quote}                 { return LEXRES_OK; }
 
795
 
 
796
"\\n"                   { appendPQExpBufferChar(output_buf, '\n'); }
 
797
"\\t"                   { appendPQExpBufferChar(output_buf, '\t'); }
 
798
"\\b"                   { appendPQExpBufferChar(output_buf, '\b'); }
 
799
"\\r"                   { appendPQExpBufferChar(output_buf, '\r'); }
 
800
"\\f"                   { appendPQExpBufferChar(output_buf, '\f'); }
 
801
 
 
802
"\\"[1-9][0-9]* {
 
803
                                        /* decimal case */
 
804
                                        appendPQExpBufferChar(output_buf,
 
805
                                                                                  (char) strtol(yytext + 1, NULL, 0));
 
806
                                }
 
807
 
 
808
"\\"0[0-7]*             {
 
809
                                        /* octal case */
 
810
                                        appendPQExpBufferChar(output_buf,
 
811
                                                                                  (char) strtol(yytext + 1, NULL, 0));
 
812
                                }
 
813
 
 
814
"\\"0[xX][0-9A-Fa-f]+   {
 
815
                                        /* hex case */
 
816
                                        appendPQExpBufferChar(output_buf,
 
817
                                                                                  (char) strtol(yytext + 1, NULL, 0));
 
818
                                }
 
819
 
 
820
"\\".                   { emit(yytext + 1, 1); }
 
821
 
 
822
{other}|\n              { ECHO; }
 
823
 
 
824
}
 
825
 
 
826
<xslashbackquote>{
 
827
        /*
 
828
         * backticked text: copy everything until next backquote or end of line.
 
829
         * Invocation of the command will happen in psql_scan_slash_option.
 
830
         */
 
831
 
 
832
"`"                             { return LEXRES_OK; }
 
833
 
 
834
{other}|\n              { ECHO; }
 
835
 
 
836
}
 
837
 
 
838
<xslashdefaultarg>{
 
839
        /*
 
840
         * Copy everything until unquoted whitespace or end of line.  Quotes
 
841
         * do not get stripped yet.
 
842
         */
 
843
 
 
844
{space}                 {
 
845
                                        yyless(0);
 
846
                                        return LEXRES_OK;
 
847
                                }
 
848
 
 
849
"\\"                    {
 
850
                                        /*
 
851
                                         * unquoted backslash is end of command or next command,
 
852
                                         * do not eat
 
853
                                         *
 
854
                                         * (this was not the behavior pre-8.0, but it seems
 
855
                                         * consistent)
 
856
                                         */
 
857
                                        yyless(0);
 
858
                                        return LEXRES_OK;
 
859
                                }
 
860
 
 
861
{dquote}                {
 
862
                                        *option_quote = '"';
 
863
                                        ECHO;
 
864
                                        BEGIN(xslashquotedarg);
 
865
                                }
 
866
 
 
867
{other}                 { ECHO; }
 
868
 
 
869
}
 
870
 
 
871
<xslashquotedarg>{
 
872
        /* double-quoted text within a default-type argument: copy */
 
873
 
 
874
{dquote}                {
 
875
                                        ECHO;
 
876
                                        BEGIN(xslashdefaultarg);
 
877
                                }
 
878
 
 
879
{other}|\n              { ECHO; }
 
880
 
 
881
}
 
882
 
 
883
<xslashwholeline>{
 
884
        /* copy everything until end of input line */
 
885
        /* but suppress leading whitespace */
 
886
 
 
887
{space}+                {
 
888
                                        if (output_buf->len > 0)
 
889
                                                ECHO;
 
890
                                }
 
891
 
 
892
{other}                 { ECHO; }
 
893
 
 
894
}
 
895
 
 
896
<xslashend>{
 
897
        /* at end of command, eat a double backslash, but not anything else */
 
898
 
 
899
"\\\\"                  { return LEXRES_OK; }
 
900
 
 
901
{other}|\n              {
 
902
                                        yyless(0);
 
903
                                        return LEXRES_OK;
 
904
                                }
 
905
 
 
906
}
 
907
 
 
908
%%
 
909
 
 
910
/*
 
911
 * Create a lexer working state struct.
 
912
 */
 
913
PsqlScanState
 
914
psql_scan_create(void)
 
915
{
 
916
        PsqlScanState state;
 
917
 
 
918
        state = (PsqlScanStateData *) pg_malloc_zero(sizeof(PsqlScanStateData));
 
919
 
 
920
        psql_scan_reset(state);
 
921
 
 
922
        return state;
 
923
}
 
924
 
 
925
/*
 
926
 * Destroy a lexer working state struct, releasing all resources.
 
927
 */
 
928
void
 
929
psql_scan_destroy(PsqlScanState state)
 
930
{
 
931
        psql_scan_finish(state);
 
932
 
 
933
        psql_scan_reset(state);
 
934
 
 
935
        free(state);
 
936
}
 
937
 
 
938
/*
 
939
 * Set up to perform lexing of the given input line.
 
940
 *
 
941
 * The text at *line, extending for line_len bytes, will be scanned by
 
942
 * subsequent calls to the psql_scan routines.  psql_scan_finish should
 
943
 * be called when scanning is complete.  Note that the lexer retains
 
944
 * a pointer to the storage at *line --- this string must not be altered
 
945
 * or freed until after psql_scan_finish is called.
 
946
 */
 
947
void
 
948
psql_scan_setup(PsqlScanState state,
 
949
                                const char *line, int line_len)
 
950
{
 
951
        /* Mustn't be scanning already */
 
952
        psql_assert(state->scanbufhandle == NULL);
 
953
        psql_assert(state->buffer_stack == NULL);
 
954
 
 
955
        /* Do we need to hack the character set encoding? */
 
956
        state->encoding = pset.encoding;
 
957
        state->safe_encoding = PG_VALID_BE_ENCODING(state->encoding);
 
958
 
 
959
        /* needed for prepare_buffer */
 
960
        cur_state = state;
 
961
 
 
962
        /* Set up flex input buffer with appropriate translation and padding */
 
963
        state->scanbufhandle = prepare_buffer(line, line_len,
 
964
                                                                                  &state->scanbuf);
 
965
        state->scanline = line;
 
966
 
 
967
        /* Set lookaside data in case we have to map unsafe encoding */
 
968
        state->curline = state->scanbuf;
 
969
        state->refline = state->scanline;
 
970
}
 
971
 
 
972
/*
 
973
 * Do lexical analysis of SQL command text.
 
974
 *
 
975
 * The text previously passed to psql_scan_setup is scanned, and appended
 
976
 * (possibly with transformation) to query_buf.
 
977
 *
 
978
 * The return value indicates the condition that stopped scanning:
 
979
 *
 
980
 * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
 
981
 * transferred to query_buf.)  The command accumulated in query_buf should
 
982
 * be executed, then clear query_buf and call again to scan the remainder
 
983
 * of the line.
 
984
 *
 
985
 * PSCAN_BACKSLASH: found a backslash that starts a psql special command.
 
986
 * Any previous data on the line has been transferred to query_buf.
 
987
 * The caller will typically next call psql_scan_slash_command(),
 
988
 * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
 
989
 *
 
990
 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
 
991
 * incomplete SQL command.  *prompt is set to the appropriate prompt type.
 
992
 *
 
993
 * PSCAN_EOL: the end of the line was reached, and there is no lexical
 
994
 * reason to consider the command incomplete.  The caller may or may not
 
995
 * choose to send it.  *prompt is set to the appropriate prompt type if
 
996
 * the caller chooses to collect more input.
 
997
 *
 
998
 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
 
999
 * be called next, then the cycle may be repeated with a fresh input line.
 
1000
 *
 
1001
 * In all cases, *prompt is set to an appropriate prompt type code for the
 
1002
 * next line-input operation.
 
1003
 */
 
1004
PsqlScanResult
 
1005
psql_scan(PsqlScanState state,
 
1006
                  PQExpBuffer query_buf,
 
1007
                  promptStatus_t *prompt)
 
1008
{
 
1009
        PsqlScanResult result;
 
1010
        int                     lexresult;
 
1011
 
 
1012
        /* Must be scanning already */
 
1013
        psql_assert(state->scanbufhandle);
 
1014
 
 
1015
        /* Set up static variables that will be used by yylex */
 
1016
        cur_state = state;
 
1017
        output_buf = query_buf;
 
1018
 
 
1019
        if (state->buffer_stack != NULL)
 
1020
                yy_switch_to_buffer(state->buffer_stack->buf);
 
1021
        else
 
1022
                yy_switch_to_buffer(state->scanbufhandle);
 
1023
 
 
1024
        BEGIN(state->start_state);
 
1025
 
 
1026
        /* And lex. */
 
1027
        lexresult = yylex();
 
1028
 
 
1029
        /* Update static vars back to the state struct */
 
1030
        state->start_state = YY_START;
 
1031
 
 
1032
        /*
 
1033
         * Check termination state and return appropriate result info.
 
1034
         */
 
1035
        switch (lexresult)
 
1036
        {
 
1037
                case LEXRES_EOL:                /* end of input */
 
1038
                        switch (state->start_state)
 
1039
                        {
 
1040
                                case INITIAL:
 
1041
                                        if (state->paren_depth > 0)
 
1042
                                        {
 
1043
                                                result = PSCAN_INCOMPLETE;
 
1044
                                                *prompt = PROMPT_PAREN;
 
1045
                                        }
 
1046
                                        else if (query_buf->len > 0)
 
1047
                                        {
 
1048
                                                result = PSCAN_EOL;
 
1049
                                                *prompt = PROMPT_CONTINUE;
 
1050
                                        }
 
1051
                                        else
 
1052
                                        {
 
1053
                                                /* never bother to send an empty buffer */
 
1054
                                                result = PSCAN_INCOMPLETE;
 
1055
                                                *prompt = PROMPT_READY;
 
1056
                                        }
 
1057
                                        break;
 
1058
                                case xb:
 
1059
                                        result = PSCAN_INCOMPLETE;
 
1060
                                        *prompt = PROMPT_SINGLEQUOTE;
 
1061
                                        break;
 
1062
                                case xc:
 
1063
                                        result = PSCAN_INCOMPLETE;
 
1064
                                        *prompt = PROMPT_COMMENT;
 
1065
                                        break;
 
1066
                                case xd:
 
1067
                                        result = PSCAN_INCOMPLETE;
 
1068
                                        *prompt = PROMPT_DOUBLEQUOTE;
 
1069
                                        break;
 
1070
                                case xh:
 
1071
                                        result = PSCAN_INCOMPLETE;
 
1072
                                        *prompt = PROMPT_SINGLEQUOTE;
 
1073
                                        break;
 
1074
                                case xq:
 
1075
                                        result = PSCAN_INCOMPLETE;
 
1076
                                        *prompt = PROMPT_SINGLEQUOTE;
 
1077
                                        break;
 
1078
                                case xdolq:
 
1079
                                        result = PSCAN_INCOMPLETE;
 
1080
                                        *prompt = PROMPT_DOLLARQUOTE;
 
1081
                                        break;
 
1082
                                default:
 
1083
                                        /* can't get here */
 
1084
                                        fprintf(stderr, "invalid YY_START\n");
 
1085
                                        exit(1);
 
1086
                        }
 
1087
                        break;
 
1088
                case LEXRES_SEMI:               /* semicolon */
 
1089
                        result = PSCAN_SEMICOLON;
 
1090
                        *prompt = PROMPT_READY;
 
1091
                        break;
 
1092
                case LEXRES_BACKSLASH:  /* backslash */
 
1093
                        result = PSCAN_BACKSLASH;
 
1094
                        *prompt = PROMPT_READY;
 
1095
                        break;
 
1096
                default:
 
1097
                        /* can't get here */
 
1098
                        fprintf(stderr, "invalid yylex result\n");
 
1099
                        exit(1);
 
1100
        }
 
1101
 
 
1102
        return result;
 
1103
}
 
1104
 
 
1105
/*
 
1106
 * Clean up after scanning a string.  This flushes any unread input and
 
1107
 * releases resources (but not the PsqlScanState itself).  Note however
 
1108
 * that this does not reset the lexer scan state; that can be done by
 
1109
 * psql_scan_reset(), which is an orthogonal operation.
 
1110
 *
 
1111
 * It is legal to call this when not scanning anything (makes it easier
 
1112
 * to deal with error recovery).
 
1113
 */
 
1114
void
 
1115
psql_scan_finish(PsqlScanState state)
 
1116
{
 
1117
        /* Drop any incomplete variable expansions. */
 
1118
        while (state->buffer_stack != NULL)
 
1119
        {
 
1120
                StackElem  *stackelem = state->buffer_stack;
 
1121
 
 
1122
                state->buffer_stack = stackelem->next;
 
1123
                yy_delete_buffer(stackelem->buf);
 
1124
                free(stackelem->bufstring);
 
1125
                if (stackelem->origstring)
 
1126
                        free(stackelem->origstring);
 
1127
                free(stackelem);
 
1128
        }
 
1129
 
 
1130
        /* Done with the outer scan buffer, too */
 
1131
        if (state->scanbufhandle)
 
1132
                yy_delete_buffer(state->scanbufhandle);
 
1133
        state->scanbufhandle = NULL;
 
1134
        if (state->scanbuf)
 
1135
                free(state->scanbuf);
 
1136
        state->scanbuf = NULL;
 
1137
}
 
1138
 
 
1139
/*
 
1140
 * Reset lexer scanning state to start conditions.  This is appropriate
 
1141
 * for executing \r psql commands (or any other time that we discard the
 
1142
 * prior contents of query_buf).  It is not, however, necessary to do this
 
1143
 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
 
1144
 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
 
1145
 * conditions are returned.
 
1146
 *
 
1147
 * Note that this is unrelated to flushing unread input; that task is
 
1148
 * done by psql_scan_finish().
 
1149
 */
 
1150
void
 
1151
psql_scan_reset(PsqlScanState state)
 
1152
{
 
1153
        state->start_state = INITIAL;
 
1154
        state->paren_depth = 0;
 
1155
        state->xcdepth = 0;                     /* not really necessary */
 
1156
        if (state->dolqstart)
 
1157
                free(state->dolqstart);
 
1158
        state->dolqstart = NULL;
 
1159
}
 
1160
 
 
1161
/*
 
1162
 * Return true if lexer is currently in an "inside quotes" state.
 
1163
 *
 
1164
 * This is pretty grotty but is needed to preserve the old behavior
 
1165
 * that mainloop.c drops blank lines not inside quotes without even
 
1166
 * echoing them.
 
1167
 */
 
1168
bool
 
1169
psql_scan_in_quote(PsqlScanState state)
 
1170
{
 
1171
        return state->start_state != INITIAL;
 
1172
}
 
1173
 
 
1174
/*
 
1175
 * Scan the command name of a psql backslash command.  This should be called
 
1176
 * after psql_scan() returns PSCAN_BACKSLASH.  It is assumed that the input
 
1177
 * has been consumed through the leading backslash.
 
1178
 *
 
1179
 * The return value is a malloc'd copy of the command name, as parsed off
 
1180
 * from the input.
 
1181
 */
 
1182
char *
 
1183
psql_scan_slash_command(PsqlScanState state)
 
1184
{
 
1185
        PQExpBufferData mybuf;
 
1186
        int                     lexresult;
 
1187
 
 
1188
        /* Must be scanning already */
 
1189
        psql_assert(state->scanbufhandle);
 
1190
 
 
1191
        /* Build a local buffer that we'll return the data of */
 
1192
        initPQExpBuffer(&mybuf);
 
1193
 
 
1194
        /* Set up static variables that will be used by yylex */
 
1195
        cur_state = state;
 
1196
        output_buf = &mybuf;
 
1197
 
 
1198
        if (state->buffer_stack != NULL)
 
1199
                yy_switch_to_buffer(state->buffer_stack->buf);
 
1200
        else
 
1201
                yy_switch_to_buffer(state->scanbufhandle);
 
1202
 
 
1203
        BEGIN(xslashcmd);
 
1204
 
 
1205
        /* And lex. */
 
1206
        lexresult = yylex();
 
1207
 
 
1208
        /* There are no possible errors in this lex state... */
 
1209
 
 
1210
        return mybuf.data;
 
1211
}
 
1212
 
 
1213
/*
 
1214
 * Parse off the next argument for a backslash command, and return it as a
 
1215
 * malloc'd string.  If there are no more arguments, returns NULL.
 
1216
 *
 
1217
 * type tells what processing, if any, to perform on the option string;
 
1218
 * for example, if it's a SQL identifier, we want to downcase any unquoted
 
1219
 * letters.
 
1220
 *
 
1221
 * if quote is not NULL, *quote is set to 0 if no quoting was found, else
 
1222
 * the quote symbol.
 
1223
 *
 
1224
 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
 
1225
 * be taken as part of the option string will be stripped.
 
1226
 *
 
1227
 * NOTE: the only possible syntax errors for backslash options are unmatched
 
1228
 * quotes, which are detected when we run out of input.  Therefore, on a
 
1229
 * syntax error we just throw away the string and return NULL; there is no
 
1230
 * need to worry about flushing remaining input.
 
1231
 */
 
1232
char *
 
1233
psql_scan_slash_option(PsqlScanState state,
 
1234
                                           enum slash_option_type type,
 
1235
                                           char *quote,
 
1236
                                           bool semicolon)
 
1237
{
 
1238
        PQExpBufferData mybuf;
 
1239
        int                     lexresult;
 
1240
        char            local_quote;
 
1241
        bool            badarg;
 
1242
 
 
1243
        /* Must be scanning already */
 
1244
        psql_assert(state->scanbufhandle);
 
1245
 
 
1246
        if (quote == NULL)
 
1247
                quote = &local_quote;
 
1248
        *quote = 0;
 
1249
 
 
1250
        /* Build a local buffer that we'll return the data of */
 
1251
        initPQExpBuffer(&mybuf);
 
1252
 
 
1253
        /* Set up static variables that will be used by yylex */
 
1254
        cur_state = state;
 
1255
        output_buf = &mybuf;
 
1256
        option_type = type;
 
1257
        option_quote = quote;
 
1258
 
 
1259
        if (state->buffer_stack != NULL)
 
1260
                yy_switch_to_buffer(state->buffer_stack->buf);
 
1261
        else
 
1262
                yy_switch_to_buffer(state->scanbufhandle);
 
1263
 
 
1264
        if (type == OT_WHOLE_LINE)
 
1265
                BEGIN(xslashwholeline);
 
1266
        else
 
1267
                BEGIN(xslasharg);
 
1268
 
 
1269
        /* And lex. */
 
1270
        lexresult = yylex();
 
1271
 
 
1272
        /*
 
1273
         * Check the lex result: we should have gotten back either LEXRES_OK
 
1274
         * or LEXRES_EOL (the latter indicating end of string).  If we were inside
 
1275
         * a quoted string, as indicated by YY_START, EOL is an error.
 
1276
         */
 
1277
        psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
 
1278
        badarg = false;
 
1279
        switch (YY_START)
 
1280
        {
 
1281
                case xslasharg:
 
1282
                        /* empty arg, or possibly a psql variable substitution */
 
1283
                        break;
 
1284
                case xslashquote:
 
1285
                        if (lexresult != LEXRES_OK)
 
1286
                                badarg = true;          /* hit EOL not ending quote */
 
1287
                        break;
 
1288
                case xslashbackquote:
 
1289
                        if (lexresult != LEXRES_OK)
 
1290
                                badarg = true;          /* hit EOL not ending quote */
 
1291
                        else
 
1292
                        {
 
1293
                                /* Perform evaluation of backticked command */
 
1294
                                char       *cmd = mybuf.data;
 
1295
                                FILE       *fd;
 
1296
                                bool            error = false;
 
1297
                                PQExpBufferData output;
 
1298
                                char            buf[512];
 
1299
                                size_t          result;
 
1300
 
 
1301
                                fd = popen(cmd, PG_BINARY_R);
 
1302
                                if (!fd)
 
1303
                                {
 
1304
                                        psql_error("%s: %s\n", cmd, strerror(errno));
 
1305
                                        error = true;
 
1306
                                }
 
1307
 
 
1308
                                initPQExpBuffer(&output);
 
1309
 
 
1310
                                if (!error)
 
1311
                                {
 
1312
                                        do
 
1313
                                        {
 
1314
                                                result = fread(buf, 1, sizeof(buf), fd);
 
1315
                                                if (ferror(fd))
 
1316
                                                {
 
1317
                                                        psql_error("%s: %s\n", cmd, strerror(errno));
 
1318
                                                        error = true;
 
1319
                                                        break;
 
1320
                                                }
 
1321
                                                appendBinaryPQExpBuffer(&output, buf, result);
 
1322
                                        } while (!feof(fd));
 
1323
                                }
 
1324
 
 
1325
                                if (fd && pclose(fd) == -1)
 
1326
                                {
 
1327
                                        psql_error("%s: %s\n", cmd, strerror(errno));
 
1328
                                        error = true;
 
1329
                                }
 
1330
 
 
1331
                                /* Now done with cmd, transfer result to mybuf */
 
1332
                                resetPQExpBuffer(&mybuf);
 
1333
 
 
1334
                                if (!error)
 
1335
                                {
 
1336
                                        /* strip any trailing newline */
 
1337
                                        if (output.len > 0 &&
 
1338
                                                output.data[output.len - 1] == '\n')
 
1339
                                                output.len--;
 
1340
                                        appendBinaryPQExpBuffer(&mybuf, output.data, output.len);
 
1341
                                }
 
1342
 
 
1343
                                termPQExpBuffer(&output);
 
1344
                        }
 
1345
                        break;
 
1346
                case xslashdefaultarg:
 
1347
                        /* Strip any trailing semi-colons if requested */
 
1348
                        if (semicolon)
 
1349
                        {
 
1350
                                while (mybuf.len > 0 &&
 
1351
                                           mybuf.data[mybuf.len - 1] == ';')
 
1352
                                {
 
1353
                                        mybuf.data[--mybuf.len] = '\0';
 
1354
                                }
 
1355
                        }
 
1356
 
 
1357
                        /*
 
1358
                         * If SQL identifier processing was requested, then we strip out
 
1359
                         * excess double quotes and downcase unquoted letters.
 
1360
                         * Doubled double-quotes become output double-quotes, per spec.
 
1361
                         *
 
1362
                         * Note that a string like FOO"BAR"BAZ will be converted to
 
1363
                         * fooBARbaz; this is somewhat inconsistent with the SQL spec,
 
1364
                         * which would have us parse it as several identifiers.  But
 
1365
                         * for psql's purposes, we want a string like "foo"."bar" to
 
1366
                         * be treated as one option, so there's little choice.
 
1367
                         */
 
1368
                        if (type == OT_SQLID || type == OT_SQLIDHACK)
 
1369
                        {
 
1370
                                bool            inquotes = false;
 
1371
                                char       *cp = mybuf.data;
 
1372
 
 
1373
                                while (*cp)
 
1374
                                {
 
1375
                                        if (*cp == '"')
 
1376
                                        {
 
1377
                                                if (inquotes && cp[1] == '"')
 
1378
                                                {
 
1379
                                                        /* Keep the first quote, remove the second */
 
1380
                                                        cp++;
 
1381
                                                }
 
1382
                                                inquotes = !inquotes;
 
1383
                                                /* Collapse out quote at *cp */
 
1384
                                                memmove(cp, cp + 1, strlen(cp));
 
1385
                                                mybuf.len--;
 
1386
                                                /* do not advance cp */
 
1387
                                        }
 
1388
                                        else
 
1389
                                        {
 
1390
                                                if (!inquotes && type == OT_SQLID)
 
1391
                                                        *cp = pg_tolower((unsigned char) *cp);
 
1392
                                                cp += PQmblen(cp, pset.encoding);
 
1393
                                        }
 
1394
                                }
 
1395
                        }
 
1396
                        break;
 
1397
                case xslashquotedarg:
 
1398
                        /* must have hit EOL inside double quotes */
 
1399
                        badarg = true;
 
1400
                        break;
 
1401
                case xslashwholeline:
 
1402
                        /* always okay */
 
1403
                        break;
 
1404
                default:
 
1405
                        /* can't get here */
 
1406
                        fprintf(stderr, "invalid YY_START\n");
 
1407
                        exit(1);
 
1408
        }
 
1409
 
 
1410
        if (badarg)
 
1411
        {
 
1412
                psql_error("unterminated quoted string\n");
 
1413
                termPQExpBuffer(&mybuf);
 
1414
                return NULL;
 
1415
        }
 
1416
 
 
1417
        /*
 
1418
         * An unquoted empty argument isn't possible unless we are at end of
 
1419
         * command.  Return NULL instead.
 
1420
         */
 
1421
        if (mybuf.len == 0 && *quote == 0)
 
1422
        {
 
1423
                termPQExpBuffer(&mybuf);
 
1424
                return NULL;
 
1425
        }
 
1426
 
 
1427
        /* Else return the completed string. */
 
1428
        return mybuf.data;
 
1429
}
 
1430
 
 
1431
/*
 
1432
 * Eat up any unused \\ to complete a backslash command.
 
1433
 */
 
1434
void
 
1435
psql_scan_slash_command_end(PsqlScanState state)
 
1436
{
 
1437
        int                     lexresult;
 
1438
 
 
1439
        /* Must be scanning already */
 
1440
        psql_assert(state->scanbufhandle);
 
1441
 
 
1442
        /* Set up static variables that will be used by yylex */
 
1443
        cur_state = state;
 
1444
        output_buf = NULL;
 
1445
 
 
1446
        if (state->buffer_stack != NULL)
 
1447
                yy_switch_to_buffer(state->buffer_stack->buf);
 
1448
        else
 
1449
                yy_switch_to_buffer(state->scanbufhandle);
 
1450
 
 
1451
        BEGIN(xslashend);
 
1452
 
 
1453
        /* And lex. */
 
1454
        lexresult = yylex();
 
1455
 
 
1456
        /* There are no possible errors in this lex state... */
 
1457
}
 
1458
 
 
1459
/*
 
1460
 * "Push back" the passed string so that it will be rescanned by subsequent
 
1461
 * psql_scan_slash_option calls.  This is presently only used in the case
 
1462
 * where a single-letter command has been concatenated with its argument.
 
1463
 *
 
1464
 * We use the same buffer stack mechanism as for variable expansion.
 
1465
 */
 
1466
void
 
1467
psql_scan_slash_pushback(PsqlScanState state, const char *str)
 
1468
{
 
1469
        /* needed for push_new_buffer */
 
1470
        cur_state = state;
 
1471
 
 
1472
        push_new_buffer(str);
 
1473
}
 
1474
 
 
1475
 
 
1476
/*
 
1477
 * Push the given string onto the stack of stuff to scan.
 
1478
 *
 
1479
 * cur_state must point to the active PsqlScanState.
 
1480
 *
 
1481
 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
 
1482
 */
 
1483
static void
 
1484
push_new_buffer(const char *newstr)
 
1485
{
 
1486
        StackElem  *stackelem;
 
1487
 
 
1488
        stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
 
1489
        stackelem->buf = prepare_buffer(newstr, strlen(newstr),
 
1490
                                                                        &stackelem->bufstring);
 
1491
        cur_state->curline = stackelem->bufstring;
 
1492
        if (cur_state->safe_encoding)
 
1493
        {
 
1494
                stackelem->origstring = NULL;
 
1495
                cur_state->refline = stackelem->bufstring;
 
1496
        }
 
1497
        else
 
1498
        {
 
1499
                stackelem->origstring = pg_strdup(newstr);
 
1500
                cur_state->refline = stackelem->origstring;
 
1501
        }
 
1502
        stackelem->next = cur_state->buffer_stack;
 
1503
        cur_state->buffer_stack = stackelem;
 
1504
}
 
1505
 
 
1506
/*
 
1507
 * Set up a flex input buffer to scan the given data.  We always make a
 
1508
 * copy of the data.  If working in an unsafe encoding, the copy has
 
1509
 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
 
1510
 *
 
1511
 * cur_state must point to the active PsqlScanState.
 
1512
 *
 
1513
 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
 
1514
 */
 
1515
static YY_BUFFER_STATE
 
1516
prepare_buffer(const char *txt, int len, char **txtcopy)
 
1517
{
 
1518
        char       *newtxt;
 
1519
 
 
1520
        /* Flex wants two \0 characters after the actual data */
 
1521
        newtxt = pg_malloc(len + 2);
 
1522
        *txtcopy = newtxt;
 
1523
        newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
 
1524
 
 
1525
        if (cur_state->safe_encoding)
 
1526
                memcpy(newtxt, txt, len);
 
1527
        else
 
1528
        {
 
1529
                /* Gotta do it the hard way */
 
1530
                int             i = 0;
 
1531
 
 
1532
                while (i < len)
 
1533
                {
 
1534
                        int             thislen = PQmblen(txt + i, cur_state->encoding);
 
1535
 
 
1536
                        /* first byte should always be okay... */
 
1537
                        newtxt[i] = txt[i];
 
1538
                        i++;
 
1539
                        while (--thislen > 0)
 
1540
                                newtxt[i++] = (char) 0xFF;
 
1541
                }
 
1542
        }
 
1543
 
 
1544
        return yy_scan_buffer(newtxt, len + 2);
 
1545
}
 
1546
 
 
1547
/*
 
1548
 * emit() --- body for ECHO macro
 
1549
 *
 
1550
 * NB: this must be used for ALL and ONLY the text copied from the flex
 
1551
 * input data.  If you pass it something that is not part of the yytext
 
1552
 * string, you are making a mistake.  Internally generated text can be
 
1553
 * appended directly to output_buf.
 
1554
 */
 
1555
static void
 
1556
emit(const char *txt, int len)
 
1557
{
 
1558
        if (cur_state->safe_encoding)
 
1559
                appendBinaryPQExpBuffer(output_buf, txt, len);
 
1560
        else
 
1561
        {
 
1562
                /* Gotta do it the hard way */
 
1563
                const char *reference = cur_state->refline;
 
1564
                int             i;
 
1565
 
 
1566
                reference += (txt - cur_state->curline);
 
1567
 
 
1568
                for (i = 0; i < len; i++)
 
1569
                {
 
1570
                        char    ch = txt[i];
 
1571
 
 
1572
                        if (ch == (char) 0xFF)
 
1573
                                ch = reference[i];
 
1574
                        appendPQExpBufferChar(output_buf, ch);
 
1575
                }
 
1576
        }
 
1577
}