2
* Copyright 2006 StartNet s.r.o.
4
* Distributed under MIT license
6
package cz.startnet.utils.pgdiff.parsers;
8
import cz.startnet.utils.pgdiff.Resources;
9
import java.text.MessageFormat;
10
import java.util.Locale;
13
* Class for parsing strings.
17
public final class Parser {
20
* String to be parsed.
22
private String string;
29
* Creates new instance of Parser.
31
* @param string {@link #string}
33
public Parser(final String string) {
39
* Checks whether the string contains given word on current position. If not
40
* then throws an exception.
42
* @param words list of words to check
44
public void expect(final String... words) {
45
for (final String word : words) {
51
* Checks whether the string contains given word on current position. If not
52
* and expectation is optional then position is not changed and method
53
* returns true. If expectation is not optional, exception with error
54
* description is thrown. If word is found, position is moved at first
55
* non-whitespace character following the word.
57
* @param word word to expect
58
* @param optional true if word is optional, otherwise false
60
* @return true if word was found, otherwise false
62
public boolean expect(final String word, final boolean optional) {
63
final int wordEnd = position + word.length();
65
if (wordEnd <= string.length()
66
&& string.substring(position, wordEnd).equalsIgnoreCase(word)
67
&& (wordEnd == string.length()
68
|| !Character.isLetter(string.charAt(wordEnd))
69
|| "(".equals(word) || ",".equals(word))) {
80
throw new ParserException(MessageFormat.format(
81
Resources.getString("CannotParseStringExpectedWord"), string,
82
word, position + 1, string.substring(position, position + 20)));
86
* Checks whether string contains at current position sequence of the words.
88
* @param words array of words
90
* @return true if whole sequence was found, otherwise false
92
public boolean expectOptional(final String... words) {
93
final boolean found = expect(words[0], true);
99
for (int i = 1; i < words.length; i++) {
108
* Moves position in the string to next non-whitespace string.
110
public void skipWhitespace() {
111
for (; position < string.length(); position++) {
112
if (!Character.isWhitespace(string.charAt(position))) {
119
* Parses identifier from current position. If identifier is quoted, it is
120
* returned quoted. If the identifier is not quoted, it is converted to
121
* lowercase. If identifier does not start with letter then exception is
122
* thrown. Position is placed at next first non-whitespace character.
124
* @return parsed identifier
126
public String parseIdentifier() {
127
String identifier = parseIdentifierInternal();
129
if (string.charAt(position) == '.') {
131
identifier += '.' + parseIdentifierInternal();
140
* Parses single part of the identifier.
142
* @return parsed identifier
144
private String parseIdentifierInternal() {
145
final boolean quoted = string.charAt(position) == '"';
148
final int endPos = string.indexOf('"', position + 1);
149
final String result = string.substring(position, endPos + 1);
150
position = endPos + 1;
154
int endPos = position;
156
for (; endPos < string.length(); endPos++) {
157
final char chr = string.charAt(endPos);
159
if (Character.isWhitespace(chr) || chr == ',' || chr == ')'
160
|| chr == '(' || chr == ';' || chr == '.') {
165
final String result =
166
string.substring(position, endPos).toLowerCase(
176
* Returns rest of the string. If the string ends with ';' then it is
177
* removed from the string before returned. If there is nothing more in the
178
* string, null is returned.
180
* @return rest of the string, without trailing ';' if present, or null if
181
* there is nothing more in the string
183
public String getRest() {
186
if (string.charAt(string.length() - 1) == ';') {
187
if (position == string.length() - 1) {
190
result = string.substring(position, string.length() - 1);
193
result = string.substring(position);
196
position = string.length();
202
* Parses integer from the string. If next word is not integer then
203
* exception is thrown.
205
* @return parsed integer value
207
public int parseInteger() {
208
int endPos = position;
210
for (; endPos < string.length(); endPos++) {
211
if (!Character.isLetterOrDigit(string.charAt(endPos))) {
218
Integer.parseInt(string.substring(position, endPos));
224
} catch (final NumberFormatException ex) {
225
throw new ParserException(MessageFormat.format(
226
Resources.getString("CannotParseStringExpectedInteger"),
227
string, position + 1,
228
string.substring(position, position + 20)), ex);
233
* Parses string from the string. String can be either quoted or unqouted.
234
* Quoted string is parsed till next unescaped quote. Unquoted string is
235
* parsed till whitespace, ',' ')' or ';' is found. If string should be
236
* empty, exception is thrown.
238
* @return parsed string, if quoted then including quotes
240
public String parseString() {
241
final boolean quoted = string.charAt(position) == '\'';
244
boolean escape = false;
245
int endPos = position + 1;
247
for (; endPos < string.length(); endPos++) {
248
final char chr = string.charAt(endPos);
252
} else if (!escape && chr == '\'') {
253
if (endPos + 1 < string.length()
254
&& string.charAt(endPos + 1) == '\'') {
262
final String result = string.substring(position, endPos + 1);
264
position = endPos + 1;
269
int endPos = position;
271
for (; endPos < string.length(); endPos++) {
272
final char chr = string.charAt(endPos);
274
if (Character.isWhitespace(chr) || chr == ',' || chr == ')'
280
if (position == endPos) {
281
throw new ParserException(MessageFormat.format(
282
Resources.getString("CannotParseStringExpectedString"),
283
string, position + 1));
286
final String result = string.substring(position, endPos);
296
* Returns expression that is ended either with ',', ')' or with end of the
297
* string. If expression is empty then exception is thrown.
299
* @return expression string
301
public String getExpression() {
302
final int endPos = getExpressionEnd();
304
if (position == endPos) {
305
throw new ParserException(MessageFormat.format(
306
Resources.getString("CannotParseStringExpectedExpression"),
307
string, position + 1,
308
string.substring(position, position + 20)));
311
final String result = string.substring(position, endPos).trim();
319
* Returns position of last character of single command within
320
* statement (like CREATE TABLE). Last character is either ',' or
321
* ')'. If no such character is found and method reaches the end of the
322
* command then position after the last character in the command is
325
* @return end position of the command
327
private int getExpressionEnd() {
329
boolean singleQuoteOn = false;
330
int charPos = position;
332
for (; charPos < string.length(); charPos++) {
333
final char chr = string.charAt(charPos);
337
} else if (chr == ')') {
338
if (bracesCount == 0) {
343
} else if (chr == '\'') {
344
singleQuoteOn = !singleQuoteOn;
345
} else if ((chr == ',') && !singleQuoteOn && (bracesCount == 0)) {
347
} else if (chr == ';' && bracesCount == 0 && !singleQuoteOn) {
356
* Returns current position in the string.
358
* @return current position in the string
360
public int getPosition() {
365
* Returns parsed string.
367
* @return parsed string
369
public String getString() {
374
* Throws exception about unsupported command in statement.
376
public void throwUnsupportedCommand() {
377
throw new ParserException(MessageFormat.format(
378
Resources.getString("CannotParseStringUnsupportedCommand"),
379
string, position + 1,
380
string.substring(position, position + 20)));
384
* Checks whether one of the words is present at current position. If the
385
* word is present then the word is returned and position is updated.
387
* @param words words to check
389
* @return found word or null if non of the words has been found
391
* @see #expectOptional(java.lang.String[])
393
public String expectOptionalOneOf(final String... words) {
394
for (final String word : words) {
395
if (expectOptional(word)) {
404
* Returns substring from the string.
406
* @param startPos start position
407
* @param endPos end position exclusive
411
public String getSubString(final int startPos, final int endPos) {
412
return string.substring(startPos, endPos);
416
* Changes current position in the string.
418
* @param position new position
420
public void setPosition(final int position) {
421
this.position = position;
425
* Parses data type from the string. Position is updated. If data type
426
* definition is not found then exception is thrown.
428
* @return data type string
430
public String parseDataType() {
431
int endPos = position;
433
while (endPos < string.length()
434
&& !Character.isWhitespace(string.charAt(endPos))
435
&& string.charAt(endPos) != '('
436
&& string.charAt(endPos) != ')'
437
&& string.charAt(endPos) != ',') {
441
if (endPos == position) {
442
throw new ParserException(MessageFormat.format(
443
Resources.getString("CannotParseStringExpectedDataType"),
444
string, position + 1,
445
string.substring(position, position + 20)));
448
String dataType = string.substring(position, endPos);
453
if ("character".equalsIgnoreCase(dataType)
454
&& expectOptional("varying")) {
455
dataType = "character varying";
456
} else if ("double".equalsIgnoreCase(dataType)
457
&& expectOptional("precision")) {
458
dataType = "double precision";
461
final boolean timestamp = "timestamp".equalsIgnoreCase(dataType)
462
|| "time".equalsIgnoreCase(dataType);
464
if (string.charAt(position) == '(') {
465
dataType += getExpression();
469
if (expectOptional("with", "time", "zone")) {
470
dataType += " with time zone";
471
} else if (expectOptional("without", "time", "zone")) {
472
dataType += " without time zone";
476
if (expectOptional("[")) {
485
* Checks whether the whole string has been consumed.
487
* @return true if there is nothing left to parse, otherwise false
489
public boolean isConsumed() {
490
return position == string.length()
491
|| position + 1 == string.length()
492
&& string.charAt(position) == ';';