1
/*****************************************************************************
2
* Copyright (C) 2008 EnterpriseDB Corporation.
3
* Copyright (C) 2011 Stado Global Development Group.
5
* This file is part of Stado.
7
* Stado is free software: you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation, either version 3 of the License, or
10
* (at your option) any later version.
12
* Stado is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
17
* You should have received a copy of the GNU General Public License
18
* along with Stado. If not, see <http://www.gnu.org/licenses/>.
20
* You can find Stado at http://www.stado.us
22
****************************************************************************/
23
package org.postgresql.driver.core;
26
* Basic query parser infrastructure.
28
* @author Michael Paesold (mpaesold@gmx.at)
33
* Find the end of the single-quoted string starting at the given offset.
35
* Note: for <tt>'single '' quote in string'</tt>, this method currently
36
* returns the offset of first <tt>'</tt> character after the initial
37
* one. The caller must call the method a second time for the second
38
* part of the quoted string.
40
public static int parseSingleQuotes(final char[] query, int offset,
41
boolean standardConformingStrings) {
42
// check for escape string syntax (E'')
43
if (standardConformingStrings
45
&& (query[offset-1] == 'e' || query[offset-1] == 'E')
46
&& charTerminatesIdentifier(query[offset-2]))
48
standardConformingStrings = false;
51
if (standardConformingStrings)
53
// do NOT treat backslashes as escape characters
54
while (++offset < query.length)
56
switch (query[offset])
67
// treat backslashes as escape characters
68
while (++offset < query.length)
70
switch (query[offset])
87
* Find the end of the double-quoted string starting at the given offset.
89
* Note: for <tt>"double "" quote in string"</tt>,
90
* this method currently returns the offset of first <tt>"</tt>
91
* character after the initial one. The caller must call the method a
92
* second time for the second part of the quoted string.
94
public static int parseDoubleQuotes(final char[] query, int offset) {
95
while (++offset < query.length && query[offset] != '"') ;
100
* Test if the dollar character (<tt>$</tt>) at the given offset starts
101
* a dollar-quoted string and return the offset of the ending dollar
104
public static int parseDollarQuotes(final char[] query, int offset) {
105
if (offset + 1 < query.length
106
&& (offset == 0 || !isIdentifierContChar(query[offset-1])))
109
if (query[offset + 1] == '$')
111
else if (isDollarQuoteStartChar(query[offset + 1]))
113
for (int d = offset + 2; d < query.length; ++d)
120
else if (!isDollarQuoteContChar(query[d]))
126
// found; note: tag includes start and end $ character
127
int tagIdx = offset, tagLen = endIdx - offset + 1;
128
offset = endIdx; // loop continues at endIdx + 1
129
for (++offset; offset < query.length; ++offset)
131
if (query[offset] == '$' &&
132
subArraysEqual(query, tagIdx, offset, tagLen))
134
offset += tagLen - 1;
144
* Test if the <tt>-</tt> character at <tt>offset</tt> starts a
145
* <tt>--</tt> style line comment, and return the position of the first
146
* <tt>\r</tt> or <tt>\n</tt> character.
148
public static int parseLineComment(final char[] query, int offset) {
149
if (offset + 1 < query.length && query[offset + 1] == '-')
151
while (++offset < query.length)
153
if (query[offset] == '\r' || query[offset] == '\n')
161
* Test if the <tt>/</tt> character at <tt>offset</tt> starts a block
162
* comment, and return the position of the last <tt>/</tt> character.
164
public static int parseBlockComment(final char[] query, int offset) {
165
if (offset + 1 < query.length && query[offset + 1] == '*')
167
// /* /* */ */ nest, according to SQL spec
169
for (offset += 2; offset < query.length; ++offset)
171
switch (query[offset-1])
174
if (query[offset] == '/')
177
++offset; // don't parse / in */* twice
181
if (query[offset] == '*')
184
++offset; // don't parse * in /*/ twice
193
--offset; // reset position to last '/' char
202
* @return true if the character is a whitespace character as defined
203
* in the backend's parser
205
public static boolean isSpace(char c) {
206
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
210
* @return true if the given character is a valid character for an
211
* operator in the backend's parser
213
public static boolean isOperatorChar(char c) {
215
* Extracted from operators defined by {self} and {op_chars}
216
* in pgsql/src/backend/parser/scan.l.
218
return ",()[].;:+-*/%^<>=~!@#&|`?".indexOf(c) != -1;
222
* Checks if a character is valid as the start of an identifier.
224
* @param c the character to check
225
* @return true if valid as first character of an identifier; false if not
227
public static boolean isIdentifierStartChar(char c) {
229
* Extracted from {ident_start} and {ident_cont} in
230
* pgsql/src/backend/parser/scan.l:
231
* ident_start [A-Za-z\200-\377_]
232
* ident_cont [A-Za-z\200-\377_0-9\$]
234
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
235
|| c == '_' || c > 127 ;
239
* Checks if a character is valid as the second or later character of an
242
* @param c the character to check
243
* @return true if valid as second or later character of an identifier; false if not
245
public static boolean isIdentifierContChar(char c) {
246
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
247
|| c == '_' || c > 127
248
|| (c >= '0' && c <= '9')
253
* @return true if the character terminates an identifier
255
public static boolean charTerminatesIdentifier(char c) {
256
return c == '"' || isSpace(c) || isOperatorChar(c);
260
* Checks if a character is valid as the start of a dollar quoting tag.
262
* @param c the character to check
263
* @return true if valid as first character of a dollar quoting tag; false if not
265
public static boolean isDollarQuoteStartChar(char c) {
267
* The allowed dollar quote start and continuation characters
268
* must stay in sync with what the backend defines in
269
* pgsql/src/backend/parser/scan.l
271
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
272
|| c == '_' || c > 127;
276
* Checks if a character is valid as the second or later character of a
277
* dollar quoting tag.
279
* @param c the character to check
280
* @return true if valid as second or later character of a dollar quoting tag;
283
public static boolean isDollarQuoteContChar(char c) {
284
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
285
|| c == '_' || c > 127
286
|| (c >= '0' && c <= '9');
290
* Compares two sub-arrays of the given character array for equalness.
291
* If the length is zero, the result is true if and only if the offsets
292
* are within the bounds of the array.
294
* @param arr a char array
295
* @param offA first sub-array start offset
296
* @param offB second sub-array start offset
297
* @param len length of the sub arrays to compare
298
* @return true if the sub-arrays are equal; false if not
300
private static boolean subArraysEqual(final char[] arr,
301
final int offA, final int offB,
303
if (offA < 0 || offB < 0
304
|| offA >= arr.length || offB >= arr.length
305
|| offA + len > arr.length || offB + len > arr.length)
308
for (int i = 0; i < len; ++i)
310
if (arr[offA + i] != arr[offB + i])