2
/*-------------------------------------------------------------------------
5
* lexical scanner for psql
7
* This code is mainly needed to determine where the end of a SQL statement
8
* is: we are looking for semicolons that are not within quotes, comments,
9
* or parentheses. The most reliable way to handle this is to borrow the
10
* backend's flex lexer rules, lock, stock, and barrel. The rules below
11
* are (except for a few) the same as the backend's, but their actions are
12
* just ECHO whereas the backend's actions generally do other things.
14
* XXX The rules in this file must be kept in sync with the main parser!!!
16
* The most difficult aspect of this code is that we need to work in multibyte
17
* encodings that are not ASCII-safe. A "safe" encoding is one in which each
18
* byte of a multibyte character has the high bit set (it's >= 0x80). Since
19
* all our lexing rules treat all high-bit-set characters alike, we don't
20
* really need to care whether such a byte is part of a sequence or not.
21
* In an "unsafe" encoding, we still expect the first byte of a multibyte
22
* sequence to be >= 0x80, but later bytes might not be. If we scan such
23
* a sequence as-is, the lexing rules could easily be fooled into matching
24
* such bytes to ordinary ASCII characters. Our solution for this is to
25
* substitute 0xFF for each non-first byte within the data presented to flex.
26
* The flex rules will then pass the FF's through unmolested. The emit()
27
* subroutine is responsible for looking back to the original string and
28
* replacing FF's with the corresponding original bytes.
30
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
31
* Portions Copyright (c) 1994, Regents of the University of California
34
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.9 2004-12-31 22:03:15 pgsql Exp $
36
*-------------------------------------------------------------------------
38
#include "postgres_fe.h"
44
#include "mb/pg_wchar.h"
48
#include "variables.h"
52
* We use a stack of flex buffers to handle substitution of psql variables.
53
* Each stacked buffer contains the as-yet-unread text from one psql variable.
54
* When we pop the stack all the way, we resume reading from the outer buffer
55
* identified by scanbufhandle.
57
typedef struct StackElem
59
YY_BUFFER_STATE buf; /* flex input control structure */
60
char *bufstring; /* data actually being scanned by flex */
61
char *origstring; /* copy of original data, if needed */
62
struct StackElem *next;
66
* All working state of the lexer must be stored in PsqlScanStateData
67
* between calls. This allows us to have multiple open lexer operations,
68
* which is needed for nested include files. The lexer itself is not
69
* recursive, but it must be re-entrant.
71
typedef struct PsqlScanStateData
73
StackElem *buffer_stack; /* stack of variable expansion buffers */
75
* These variables always refer to the outer buffer, never to any
76
* stacked variable-expansion buffer.
78
YY_BUFFER_STATE scanbufhandle;
79
char *scanbuf; /* start of outer-level input buffer */
80
const char *scanline; /* current input line at outer level */
82
/* safe_encoding, curline, refline are used by emit() to replace FFs */
83
int encoding; /* encoding being used now */
84
bool safe_encoding; /* is current encoding "safe"? */
85
const char *curline; /* actual flex input string for cur buf */
86
const char *refline; /* original data for cur buffer */
89
* All this state lives across successive input lines, until explicitly
90
* reset by psql_scan_reset.
92
int start_state; /* saved YY_START */
93
int paren_depth; /* depth of nesting in parentheses */
94
int xcdepth; /* depth of nesting in slash-star comments */
95
char *dolqstart; /* current $foo$ quote start string */
98
static PsqlScanState cur_state; /* current state while active */
100
static PQExpBuffer output_buf; /* current output buffer */
102
/* these variables do not need to be saved across calls */
103
static enum slash_option_type option_type;
104
static char *option_quote;
107
/* Return values from yylex() */
108
#define LEXRES_EOL 0 /* end of input */
109
#define LEXRES_SEMI 1 /* command-terminating semicolon found */
110
#define LEXRES_BACKSLASH 2 /* backslash command start */
111
#define LEXRES_OK 3 /* OK completion of backslash argument */
116
static void push_new_buffer(const char *newstr);
117
static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
119
static void emit(const char *txt, int len);
121
#define ECHO emit(yytext, yyleng)
126
%option never-interactive
132
* All of the following definitions and rules should exactly match
133
* src/backend/parser/scan.l so far as the flex patterns are concerned.
134
* The rule bodies are just ECHO as opposed to what the backend does,
135
* however. (But be sure to duplicate code that affects the lexing process,
136
* such as BEGIN().) Also, psqlscan uses a single <<EOF>> rule whereas
137
* scan.l has a separate one for each exclusive state.
141
* OK, here is a short description of lex/flex rules behavior.
142
* The longest pattern which matches an input string is always chosen.
143
* For equal-length patterns, the first occurring in the rules list is chosen.
144
* INITIAL is the starting state, to which all non-conditional rules apply.
145
* Exclusive states change parsing rules while the state is active. When in
146
* an exclusive state, only those rules defined for that state apply.
148
* We use exclusive states for quoted strings, extended comments,
149
* and to eliminate parsing troubles for numeric strings.
151
* <xb> bit string literal
152
* <xc> extended C-style comments
153
* <xd> delimited identifiers (double-quoted identifiers)
154
* <xh> hexadecimal numeric string
155
* <xq> quoted strings
156
* <xdolq> $foo$ quoted strings
165
/* Additional exclusive states for psql only: lex backslash commands */
176
* In order to make the world safe for Windows and Mac clients as well as
177
* Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
178
* sequence will be seen as two successive newlines, but that doesn't cause
179
* any problems. Comments that start with -- and extend to the next
180
* newline are treated as equivalent to a single whitespace character.
182
* NOTE a fine point: if there is no newline following --, we will absorb
183
* everything to the end of the input as a comment. This is correct. Older
184
* versions of Postgres failed to recognize -- as a comment if the input
185
* did not end with a newline.
187
* XXX perhaps \f (formfeed) should be treated as a newline as well?
195
comment ("--"{non_newline}*)
197
whitespace ({space}+|{comment})
200
* SQL requires at least one newline in the whitespace separating
201
* string literals that are to be concatenated. Silly, but who are we
202
* to argue? Note that {whitespace_with_newline} should not have * after
203
* it, whereas {whitespace} should generally have a * after it...
206
special_whitespace ({space}+|{comment}{newline})
207
horiz_whitespace ({horiz_space}|{comment})
208
whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
211
* It is tempting to scan the string for only those characters
212
* which are allowed. However, this leads to silently swallowed
213
* characters if illegal characters are included in the string.
214
* For example, if xbinside is [01] then B'ABCD' is interpreted
215
* as a zero-length string, and the ABCD' is lost!
216
* Better to pass the string forward and let the input routines
217
* validate the contents.
222
xbcat {quote}{whitespace_with_newline}{quote}
224
/* Hexadecimal number
229
xhcat {quote}{whitespace_with_newline}{quote}
231
/* National character
236
* xqdouble implements embedded quote
237
* xqcat allows strings to cross input lines
242
xqdouble {quote}{quote}
245
xqoctesc [\\][0-7]{1,3}
246
xqcat {quote}{whitespace_with_newline}{quote}
248
/* $foo$ style quotes ("dollar quoting")
249
* The quoted string starts with $foo$ where "foo" is an optional string
250
* in the form of an identifier, except that it may not contain "$",
251
* and extends to the first occurrence of an identical string.
252
* There is *no* processing of the quoted text.
254
dolq_start [A-Za-z\200-\377_]
255
dolq_cont [A-Za-z\200-\377_0-9]
256
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
260
* Allows embedded spaces and other special characters into identifiers.
265
xddouble {dquote}{dquote}
270
* The "extended comment" syntax closely resembles allowable operator syntax.
271
* The tricky part here is to get lex to recognize a string starting with
272
* slash-star as a comment, when interpreting it as an operator would produce
273
* a longer match --- remember lex will prefer a longer match! Also, if we
274
* have something like plus-slash-star, lex will think this is a 3-character
275
* operator whereas we want to see it as a + operator and a comment start.
276
* The solution is two-fold:
277
* 1. append {op_chars}* to xcstart so that it matches as much text as
278
* {operator} would. Then the tie-breaker (first matching rule of same
279
* length) ensures xcstart wins. We put back the extra stuff with yyless()
280
* in case it contains a star-slash that should terminate the comment.
281
* 2. In the operator rule, check for slash-star within the operator, and
282
* if found throw it back with yyless(). This handles the plus-slash-star
284
* Dash-dash comments have similar interactions with the operator rule.
286
xcstart \/\*{op_chars}*
291
ident_start [A-Za-z\200-\377_]
292
ident_cont [A-Za-z\200-\377_0-9\$]
294
identifier {ident_start}{ident_cont}*
299
* "self" is the set of chars that should be returned as single-character
300
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
301
* which can be one or more characters long (but if a single-char token
302
* appears in the "self" set, it is not to be returned as an Op). Note
303
* that the sets overlap, but each has some chars that are not in the other.
305
* If you change either set, adjust the character lists appearing in the
306
* rule for "operator"!
308
self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
309
op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
312
/* we no longer allow unary minus in numbers.
313
* instead we pass it separately to parser. there it gets
314
* coerced via doNegate() -- Leon aug 20 1999
318
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
319
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
326
* Dollar quoted strings are totally opaque, and no escaping is done on them.
327
* Other quoted strings must allow some special characters such as single-quote
329
* Embedded single-quotes are implemented both in the SQL standard
330
* style of two adjacent single quotes "''" and in the Postgres/Java style
331
* of escaped-quote "\'".
332
* Other embedded escaped characters are matched explicitly and the leading
333
* backslash is dropped from the string.
334
* Note that xcstart must appear before operator, as explained above!
335
* Also whitespace (comment) must appear before operator.
342
* Note that the whitespace rule includes both true
343
* whitespace and single-line ("--" style) comments.
344
* We suppress whitespace at the start of the query
345
* buffer. We also suppress all single-line comments,
346
* which is pretty dubious but is the historical
349
if (!(output_buf->len == 0 || yytext[0] == '-'))
354
cur_state->xcdepth = 0;
356
/* Put back any characters past slash-star; see above */
362
cur_state->xcdepth++;
363
/* Put back any characters past slash-star; see above */
369
if (cur_state->xcdepth <= 0)
374
cur_state->xcdepth--;
404
/* Hexadecimal bit type.
405
* At some point we should simply pass the string
406
* forward to the parser and label it there.
407
* In the meantime, place a leading "x" on the string
408
* to mark it for the input routine as a hex string.
447
/* This is only needed for \ just before EOF */
452
cur_state->dolqstart = pg_strdup(yytext);
457
if (strcmp(yytext, cur_state->dolqstart) == 0)
459
free(cur_state->dolqstart);
460
cur_state->dolqstart = NULL;
466
* When we fail to match $...$ to dolqstart, transfer
467
* the $... part to the output, but put back the final
468
* $ for rescanning. Consider $delim$...$junk$delim$
474
<xdolq>{dolqinside} {
478
/* This is only needed for $ inside the quoted text */
502
* These rules are specific to psql --- they implement parenthesis
503
* counting and detection of command-ending semicolon. These must
504
* appear before the {self} rule so that they take precedence over it.
508
cur_state->paren_depth++;
513
if (cur_state->paren_depth > 0)
514
cur_state->paren_depth--;
520
if (cur_state->paren_depth == 0)
522
/* Terminate lexing temporarily */
528
* psql-specific rules to handle backslash commands and variable
529
* substitution. We want these before {self}, also.
533
/* Force a semicolon or colon into the query buffer */
538
/* Terminate lexing temporarily */
539
return LEXRES_BACKSLASH;
543
/* Possible psql variable substitution */
546
value = GetVariable(pset.vars, yytext + 1);
550
/* It is a variable, perform substitution */
551
push_new_buffer(value);
552
/* yy_scan_string already made buffer active */
557
* if the variable doesn't exist we'll copy the
565
* Back to backend-compatible rules.
574
* Check for embedded slash-star or dash-dash; those
575
* are comment starts, so operator must stop there.
576
* Note that slash-star or dash-dash at the first
577
* character will match a prior rule, not this one.
580
char *slashstar = strstr(yytext, "/*");
581
char *dashdash = strstr(yytext, "--");
583
if (slashstar && dashdash)
585
/* if both appear, take the first one */
586
if (slashstar > dashdash)
587
slashstar = dashdash;
590
slashstar = dashdash;
592
nchars = slashstar - yytext;
595
* For SQL compatibility, '+' and '-' cannot be the
596
* last char of a multi-char operator unless the operator
597
* contains chars that are not in SQL operators.
598
* The idea is to lex '=-' as two operators, but not
599
* to forbid operator names like '?-' that could not be
600
* sequences of SQL operators.
603
(yytext[nchars-1] == '+' ||
604
yytext[nchars-1] == '-'))
608
for (ic = nchars-2; ic >= 0; ic--)
610
if (strchr("~!@#^&|`?%", yytext[ic]))
614
break; /* found a char that makes it OK */
615
nchars--; /* else remove the +/-, and check again */
620
/* Strip the unwanted chars from the token */
651
* Everything from here down is psql-specific.
655
StackElem *stackelem = cur_state->buffer_stack;
657
if (stackelem == NULL)
658
return LEXRES_EOL; /* end of input reached */
661
* We were expanding a variable, so pop the inclusion
662
* stack and keep lexing
664
cur_state->buffer_stack = stackelem->next;
665
yy_delete_buffer(stackelem->buf);
666
free(stackelem->bufstring);
667
if (stackelem->origstring)
668
free(stackelem->origstring);
671
stackelem = cur_state->buffer_stack;
672
if (stackelem != NULL)
674
yy_switch_to_buffer(stackelem->buf);
675
cur_state->curline = stackelem->bufstring;
676
cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
680
yy_switch_to_buffer(cur_state->scanbufhandle);
681
cur_state->curline = cur_state->scanbuf;
682
cur_state->refline = cur_state->scanline;
687
* Exclusive lexer states to handle backslash command lexing
691
/* command name ends at whitespace or backslash; eat all else */
703
/* eat any whitespace, then decide what to do at first nonblank */
709
* backslash is end of command or next command, do not eat
711
* XXX this means we can't conveniently accept options
712
* that start with a backslash; therefore, option
713
* processing that encourages use of backslashes is rather
721
*option_quote = '\'';
726
if (option_type == OT_VERBATIM)
728
/* in verbatim mode, backquote is not special */
730
BEGIN(xslashdefaultarg);
735
BEGIN(xslashbackquote);
740
/* Possible psql variable substitution */
741
if (option_type == OT_VERBATIM)
747
value = GetVariable(pset.vars, yytext + 1);
750
* The variable value is just emitted without any
751
* further examination. This is consistent with the
752
* pre-8.0 code behavior, if not with the way that
753
* variables are handled outside backslash commands.
756
appendPQExpBufferStr(output_buf, value);
766
if (option_type == OT_FILEPIPE)
768
/* treat like whole-string case */
769
BEGIN(xslashwholeline);
773
/* treat like default case */
774
BEGIN(xslashdefaultarg);
781
BEGIN(xslashquotedarg);
786
BEGIN(xslashdefaultarg);
792
/* single-quoted text: copy literally except for backslash sequences */
794
{quote} { return LEXRES_OK; }
796
"\\n" { appendPQExpBufferChar(output_buf, '\n'); }
797
"\\t" { appendPQExpBufferChar(output_buf, '\t'); }
798
"\\b" { appendPQExpBufferChar(output_buf, '\b'); }
799
"\\r" { appendPQExpBufferChar(output_buf, '\r'); }
800
"\\f" { appendPQExpBufferChar(output_buf, '\f'); }
804
appendPQExpBufferChar(output_buf,
805
(char) strtol(yytext + 1, NULL, 0));
810
appendPQExpBufferChar(output_buf,
811
(char) strtol(yytext + 1, NULL, 0));
814
"\\"0[xX][0-9A-Fa-f]+ {
816
appendPQExpBufferChar(output_buf,
817
(char) strtol(yytext + 1, NULL, 0));
820
"\\". { emit(yytext + 1, 1); }
828
* backticked text: copy everything until next backquote or end of line.
829
* Invocation of the command will happen in psql_scan_slash_option.
832
"`" { return LEXRES_OK; }
840
* Copy everything until unquoted whitespace or end of line. Quotes
841
* do not get stripped yet.
851
* unquoted backslash is end of command or next command,
854
* (this was not the behavior pre-8.0, but it seems
864
BEGIN(xslashquotedarg);
872
/* double-quoted text within a default-type argument: copy */
876
BEGIN(xslashdefaultarg);
884
/* copy everything until end of input line */
885
/* but suppress leading whitespace */
888
if (output_buf->len > 0)
897
/* at end of command, eat a double backslash, but not anything else */
899
"\\\\" { return LEXRES_OK; }
911
* Create a lexer working state struct.
914
psql_scan_create(void)
918
state = (PsqlScanStateData *) pg_malloc_zero(sizeof(PsqlScanStateData));
920
psql_scan_reset(state);
926
* Destroy a lexer working state struct, releasing all resources.
929
psql_scan_destroy(PsqlScanState state)
931
psql_scan_finish(state);
933
psql_scan_reset(state);
939
* Set up to perform lexing of the given input line.
941
* The text at *line, extending for line_len bytes, will be scanned by
942
* subsequent calls to the psql_scan routines. psql_scan_finish should
943
* be called when scanning is complete. Note that the lexer retains
944
* a pointer to the storage at *line --- this string must not be altered
945
* or freed until after psql_scan_finish is called.
948
psql_scan_setup(PsqlScanState state,
949
const char *line, int line_len)
951
/* Mustn't be scanning already */
952
psql_assert(state->scanbufhandle == NULL);
953
psql_assert(state->buffer_stack == NULL);
955
/* Do we need to hack the character set encoding? */
956
state->encoding = pset.encoding;
957
state->safe_encoding = PG_VALID_BE_ENCODING(state->encoding);
959
/* needed for prepare_buffer */
962
/* Set up flex input buffer with appropriate translation and padding */
963
state->scanbufhandle = prepare_buffer(line, line_len,
965
state->scanline = line;
967
/* Set lookaside data in case we have to map unsafe encoding */
968
state->curline = state->scanbuf;
969
state->refline = state->scanline;
973
* Do lexical analysis of SQL command text.
975
* The text previously passed to psql_scan_setup is scanned, and appended
976
* (possibly with transformation) to query_buf.
978
* The return value indicates the condition that stopped scanning:
980
* PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
981
* transferred to query_buf.) The command accumulated in query_buf should
982
* be executed, then clear query_buf and call again to scan the remainder
985
* PSCAN_BACKSLASH: found a backslash that starts a psql special command.
986
* Any previous data on the line has been transferred to query_buf.
987
* The caller will typically next call psql_scan_slash_command(),
988
* perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
990
* PSCAN_INCOMPLETE: the end of the line was reached, but we have an
991
* incomplete SQL command. *prompt is set to the appropriate prompt type.
993
* PSCAN_EOL: the end of the line was reached, and there is no lexical
994
* reason to consider the command incomplete. The caller may or may not
995
* choose to send it. *prompt is set to the appropriate prompt type if
996
* the caller chooses to collect more input.
998
* In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
999
* be called next, then the cycle may be repeated with a fresh input line.
1001
* In all cases, *prompt is set to an appropriate prompt type code for the
1002
* next line-input operation.
1005
psql_scan(PsqlScanState state,
1006
PQExpBuffer query_buf,
1007
promptStatus_t *prompt)
1009
PsqlScanResult result;
1012
/* Must be scanning already */
1013
psql_assert(state->scanbufhandle);
1015
/* Set up static variables that will be used by yylex */
1017
output_buf = query_buf;
1019
if (state->buffer_stack != NULL)
1020
yy_switch_to_buffer(state->buffer_stack->buf);
1022
yy_switch_to_buffer(state->scanbufhandle);
1024
BEGIN(state->start_state);
1027
lexresult = yylex();
1029
/* Update static vars back to the state struct */
1030
state->start_state = YY_START;
1033
* Check termination state and return appropriate result info.
1037
case LEXRES_EOL: /* end of input */
1038
switch (state->start_state)
1041
if (state->paren_depth > 0)
1043
result = PSCAN_INCOMPLETE;
1044
*prompt = PROMPT_PAREN;
1046
else if (query_buf->len > 0)
1049
*prompt = PROMPT_CONTINUE;
1053
/* never bother to send an empty buffer */
1054
result = PSCAN_INCOMPLETE;
1055
*prompt = PROMPT_READY;
1059
result = PSCAN_INCOMPLETE;
1060
*prompt = PROMPT_SINGLEQUOTE;
1063
result = PSCAN_INCOMPLETE;
1064
*prompt = PROMPT_COMMENT;
1067
result = PSCAN_INCOMPLETE;
1068
*prompt = PROMPT_DOUBLEQUOTE;
1071
result = PSCAN_INCOMPLETE;
1072
*prompt = PROMPT_SINGLEQUOTE;
1075
result = PSCAN_INCOMPLETE;
1076
*prompt = PROMPT_SINGLEQUOTE;
1079
result = PSCAN_INCOMPLETE;
1080
*prompt = PROMPT_DOLLARQUOTE;
1083
/* can't get here */
1084
fprintf(stderr, "invalid YY_START\n");
1088
case LEXRES_SEMI: /* semicolon */
1089
result = PSCAN_SEMICOLON;
1090
*prompt = PROMPT_READY;
1092
case LEXRES_BACKSLASH: /* backslash */
1093
result = PSCAN_BACKSLASH;
1094
*prompt = PROMPT_READY;
1097
/* can't get here */
1098
fprintf(stderr, "invalid yylex result\n");
1106
* Clean up after scanning a string. This flushes any unread input and
1107
* releases resources (but not the PsqlScanState itself). Note however
1108
* that this does not reset the lexer scan state; that can be done by
1109
* psql_scan_reset(), which is an orthogonal operation.
1111
* It is legal to call this when not scanning anything (makes it easier
1112
* to deal with error recovery).
1115
psql_scan_finish(PsqlScanState state)
1117
/* Drop any incomplete variable expansions. */
1118
while (state->buffer_stack != NULL)
1120
StackElem *stackelem = state->buffer_stack;
1122
state->buffer_stack = stackelem->next;
1123
yy_delete_buffer(stackelem->buf);
1124
free(stackelem->bufstring);
1125
if (stackelem->origstring)
1126
free(stackelem->origstring);
1130
/* Done with the outer scan buffer, too */
1131
if (state->scanbufhandle)
1132
yy_delete_buffer(state->scanbufhandle);
1133
state->scanbufhandle = NULL;
1135
free(state->scanbuf);
1136
state->scanbuf = NULL;
1140
* Reset lexer scanning state to start conditions. This is appropriate
1141
* for executing \r psql commands (or any other time that we discard the
1142
* prior contents of query_buf). It is not, however, necessary to do this
1143
* when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1144
* PSCAN_EOL scan result, because the scan state must be INITIAL when those
1145
* conditions are returned.
1147
* Note that this is unrelated to flushing unread input; that task is
1148
* done by psql_scan_finish().
1151
psql_scan_reset(PsqlScanState state)
1153
state->start_state = INITIAL;
1154
state->paren_depth = 0;
1155
state->xcdepth = 0; /* not really necessary */
1156
if (state->dolqstart)
1157
free(state->dolqstart);
1158
state->dolqstart = NULL;
1162
* Return true if lexer is currently in an "inside quotes" state.
1164
* This is pretty grotty but is needed to preserve the old behavior
1165
* that mainloop.c drops blank lines not inside quotes without even
1169
psql_scan_in_quote(PsqlScanState state)
1171
return state->start_state != INITIAL;
1175
* Scan the command name of a psql backslash command. This should be called
1176
* after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
1177
* has been consumed through the leading backslash.
1179
* The return value is a malloc'd copy of the command name, as parsed off
1183
psql_scan_slash_command(PsqlScanState state)
1185
PQExpBufferData mybuf;
1188
/* Must be scanning already */
1189
psql_assert(state->scanbufhandle);
1191
/* Build a local buffer that we'll return the data of */
1192
initPQExpBuffer(&mybuf);
1194
/* Set up static variables that will be used by yylex */
1196
output_buf = &mybuf;
1198
if (state->buffer_stack != NULL)
1199
yy_switch_to_buffer(state->buffer_stack->buf);
1201
yy_switch_to_buffer(state->scanbufhandle);
1206
lexresult = yylex();
1208
/* There are no possible errors in this lex state... */
1214
* Parse off the next argument for a backslash command, and return it as a
1215
* malloc'd string. If there are no more arguments, returns NULL.
1217
* type tells what processing, if any, to perform on the option string;
1218
* for example, if it's a SQL identifier, we want to downcase any unquoted
1221
* if quote is not NULL, *quote is set to 0 if no quoting was found, else
1224
* if semicolon is true, unquoted trailing semicolon(s) that would otherwise
1225
* be taken as part of the option string will be stripped.
1227
* NOTE: the only possible syntax errors for backslash options are unmatched
1228
* quotes, which are detected when we run out of input. Therefore, on a
1229
* syntax error we just throw away the string and return NULL; there is no
1230
* need to worry about flushing remaining input.
1233
psql_scan_slash_option(PsqlScanState state,
1234
enum slash_option_type type,
1238
PQExpBufferData mybuf;
1243
/* Must be scanning already */
1244
psql_assert(state->scanbufhandle);
1247
quote = &local_quote;
1250
/* Build a local buffer that we'll return the data of */
1251
initPQExpBuffer(&mybuf);
1253
/* Set up static variables that will be used by yylex */
1255
output_buf = &mybuf;
1257
option_quote = quote;
1259
if (state->buffer_stack != NULL)
1260
yy_switch_to_buffer(state->buffer_stack->buf);
1262
yy_switch_to_buffer(state->scanbufhandle);
1264
if (type == OT_WHOLE_LINE)
1265
BEGIN(xslashwholeline);
1270
lexresult = yylex();
1273
* Check the lex result: we should have gotten back either LEXRES_OK
1274
* or LEXRES_EOL (the latter indicating end of string). If we were inside
1275
* a quoted string, as indicated by YY_START, EOL is an error.
1277
psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
1282
/* empty arg, or possibly a psql variable substitution */
1285
if (lexresult != LEXRES_OK)
1286
badarg = true; /* hit EOL not ending quote */
1288
case xslashbackquote:
1289
if (lexresult != LEXRES_OK)
1290
badarg = true; /* hit EOL not ending quote */
1293
/* Perform evaluation of backticked command */
1294
char *cmd = mybuf.data;
1297
PQExpBufferData output;
1301
fd = popen(cmd, PG_BINARY_R);
1304
psql_error("%s: %s\n", cmd, strerror(errno));
1308
initPQExpBuffer(&output);
1314
result = fread(buf, 1, sizeof(buf), fd);
1317
psql_error("%s: %s\n", cmd, strerror(errno));
1321
appendBinaryPQExpBuffer(&output, buf, result);
1322
} while (!feof(fd));
1325
if (fd && pclose(fd) == -1)
1327
psql_error("%s: %s\n", cmd, strerror(errno));
1331
/* Now done with cmd, transfer result to mybuf */
1332
resetPQExpBuffer(&mybuf);
1336
/* strip any trailing newline */
1337
if (output.len > 0 &&
1338
output.data[output.len - 1] == '\n')
1340
appendBinaryPQExpBuffer(&mybuf, output.data, output.len);
1343
termPQExpBuffer(&output);
1346
case xslashdefaultarg:
1347
/* Strip any trailing semi-colons if requested */
1350
while (mybuf.len > 0 &&
1351
mybuf.data[mybuf.len - 1] == ';')
1353
mybuf.data[--mybuf.len] = '\0';
1358
* If SQL identifier processing was requested, then we strip out
1359
* excess double quotes and downcase unquoted letters.
1360
* Doubled double-quotes become output double-quotes, per spec.
1362
* Note that a string like FOO"BAR"BAZ will be converted to
1363
* fooBARbaz; this is somewhat inconsistent with the SQL spec,
1364
* which would have us parse it as several identifiers. But
1365
* for psql's purposes, we want a string like "foo"."bar" to
1366
* be treated as one option, so there's little choice.
1368
if (type == OT_SQLID || type == OT_SQLIDHACK)
1370
bool inquotes = false;
1371
char *cp = mybuf.data;
1377
if (inquotes && cp[1] == '"')
1379
/* Keep the first quote, remove the second */
1382
inquotes = !inquotes;
1383
/* Collapse out quote at *cp */
1384
memmove(cp, cp + 1, strlen(cp));
1386
/* do not advance cp */
1390
if (!inquotes && type == OT_SQLID)
1391
*cp = pg_tolower((unsigned char) *cp);
1392
cp += PQmblen(cp, pset.encoding);
1397
case xslashquotedarg:
1398
/* must have hit EOL inside double quotes */
1401
case xslashwholeline:
1405
/* can't get here */
1406
fprintf(stderr, "invalid YY_START\n");
1412
psql_error("unterminated quoted string\n");
1413
termPQExpBuffer(&mybuf);
1418
* An unquoted empty argument isn't possible unless we are at end of
1419
* command. Return NULL instead.
1421
if (mybuf.len == 0 && *quote == 0)
1423
termPQExpBuffer(&mybuf);
1427
/* Else return the completed string. */
1432
* Eat up any unused \\ to complete a backslash command.
1435
psql_scan_slash_command_end(PsqlScanState state)
1439
/* Must be scanning already */
1440
psql_assert(state->scanbufhandle);
1442
/* Set up static variables that will be used by yylex */
1446
if (state->buffer_stack != NULL)
1447
yy_switch_to_buffer(state->buffer_stack->buf);
1449
yy_switch_to_buffer(state->scanbufhandle);
1454
lexresult = yylex();
1456
/* There are no possible errors in this lex state... */
1460
* "Push back" the passed string so that it will be rescanned by subsequent
1461
* psql_scan_slash_option calls. This is presently only used in the case
1462
* where a single-letter command has been concatenated with its argument.
1464
* We use the same buffer stack mechanism as for variable expansion.
1467
psql_scan_slash_pushback(PsqlScanState state, const char *str)
1469
/* needed for push_new_buffer */
1472
push_new_buffer(str);
1477
* Push the given string onto the stack of stuff to scan.
1479
* cur_state must point to the active PsqlScanState.
1481
* NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1484
push_new_buffer(const char *newstr)
1486
StackElem *stackelem;
1488
stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1489
stackelem->buf = prepare_buffer(newstr, strlen(newstr),
1490
&stackelem->bufstring);
1491
cur_state->curline = stackelem->bufstring;
1492
if (cur_state->safe_encoding)
1494
stackelem->origstring = NULL;
1495
cur_state->refline = stackelem->bufstring;
1499
stackelem->origstring = pg_strdup(newstr);
1500
cur_state->refline = stackelem->origstring;
1502
stackelem->next = cur_state->buffer_stack;
1503
cur_state->buffer_stack = stackelem;
1507
* Set up a flex input buffer to scan the given data. We always make a
1508
* copy of the data. If working in an unsafe encoding, the copy has
1509
* multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1511
* cur_state must point to the active PsqlScanState.
1513
* NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1515
static YY_BUFFER_STATE
1516
prepare_buffer(const char *txt, int len, char **txtcopy)
1520
/* Flex wants two \0 characters after the actual data */
1521
newtxt = pg_malloc(len + 2);
1523
newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1525
if (cur_state->safe_encoding)
1526
memcpy(newtxt, txt, len);
1529
/* Gotta do it the hard way */
1534
int thislen = PQmblen(txt + i, cur_state->encoding);
1536
/* first byte should always be okay... */
1539
while (--thislen > 0)
1540
newtxt[i++] = (char) 0xFF;
1544
return yy_scan_buffer(newtxt, len + 2);
1548
* emit() --- body for ECHO macro
1550
* NB: this must be used for ALL and ONLY the text copied from the flex
1551
* input data. If you pass it something that is not part of the yytext
1552
* string, you are making a mistake. Internally generated text can be
1553
* appended directly to output_buf.
1556
emit(const char *txt, int len)
1558
if (cur_state->safe_encoding)
1559
appendBinaryPQExpBuffer(output_buf, txt, len);
1562
/* Gotta do it the hard way */
1563
const char *reference = cur_state->refline;
1566
reference += (txt - cur_state->curline);
1568
for (i = 0; i < len; i++)
1572
if (ch == (char) 0xFF)
1574
appendPQExpBufferChar(output_buf, ch);