2
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License as
6
* published by the Free Software Foundation; version 2 of the
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
23
#include <glib/gunicode.h>
24
#include "grt/common.h"
25
#include "grt/grt_manager.h"
26
#include "grtdb/charset_utils.h"
27
#include "base/string_utilities.h"
29
#include "mysql_sql_parser_fe.h"
31
#include <boost/scoped_array.hpp>
32
#include "myx_statement_parser.h"
37
extern int pthread_dummy(int) { return 0; }
39
typedef void * YYSTYPE;
40
#define YYSTYPE_IS_DECLARED
42
// to stop complaint from compiler about a certain ATTRIBUTE_FORMAT in my_dbug.h
45
#include "myx_lex_helpers.h"
48
#include "myx_sql_parser.tab.hh"
50
const char *MYSQL_DEFAULT_CHARSET= "utf8_bin";
53
namespace mysql_parser
55
CHARSET_INFO * get_sql_charset_by_name(const char *cs_name, int/*myf*/ flags)
57
return get_charset_by_name(cs_name, flags);
64
Mysql_sql_parser_fe *sql_parser_fe;
65
Mysql_sql_parser_fe::fe_process_sql_statement_callback cb;
69
bool is_ast_generation_enabled;
70
size_t max_insert_statement_size;
71
bool processing_create_statements;
72
bool processing_alter_statements;
73
bool processing_drop_statements;
74
Mysql_sql_parser_fe::SqlMode sql_mode;
81
static CHARSET_INFO * charset() { return get_charset_by_name(MYSQL_DEFAULT_CHARSET, MYF(0)); }
82
Lex_helper(const char *statement, const Mysql_sql_parser_fe::SqlMode &sql_mode, bool is_ast_generation_enabled)
84
lex_start(&_lex, reinterpret_cast<const unsigned char *>(statement), (unsigned int)strlen(statement));
85
_lex.first_item= NULL;
87
_lex.charset= charset();
88
lex_args.arg1= &_yystype;
90
myx_set_parser_source(statement);
92
_lex.sql_mode.MODE_ANSI_QUOTES= sql_mode.MODE_ANSI_QUOTES;
93
_lex.sql_mode.MODE_HIGH_NOT_PRECEDENCE= sql_mode.MODE_HIGH_NOT_PRECEDENCE;
94
_lex.sql_mode.MODE_PIPES_AS_CONCAT= sql_mode.MODE_PIPES_AS_CONCAT;
95
_lex.sql_mode.MODE_NO_BACKSLASH_ESCAPES= sql_mode.MODE_NO_BACKSLASH_ESCAPES;
96
_lex.sql_mode.MODE_IGNORE_SPACE= sql_mode.MODE_IGNORE_SPACE;
97
_lex.ignore_space= _lex.sql_mode.MODE_IGNORE_SPACE;
99
SqlAstStatics::sql_statement(statement);
100
SqlAstStatics::is_ast_generation_enabled= is_ast_generation_enabled != 0;
101
SqlAstStatics::last_terminal_node= SqlAstTerminalNode(NULL, 0, -1, -1, -1);
102
SqlAstStatics::first_terminal_node= SqlAstTerminalNode(NULL, 0, -1, -1, -1);
106
myx_free_parser_source();
108
LEX * lex() { return &_lex; }
115
#define LEX_HELPER(statement, sql_mode, is_ast_generation_enabled) Lex_helper _lex_helper(statement, sql_mode, is_ast_generation_enabled);
118
std::string get_first_sql_token(const char *statement, Mysql_sql_parser_fe::SqlMode sql_mode, int *first_token_pos)
120
LEX_HELPER(statement, sql_mode, true)
126
const SqlAstNode *item= static_cast<const SqlAstNode *>(token);
127
if (item && item->value_length())
129
*first_token_pos= item->stmt_boffset();
130
return base::toupper(item->value());
134
*first_token_pos= -1;
139
bool is_statement_relevant(const char *statement, const Context *context)
142
std::string token= get_first_sql_token(statement, context->sql_mode, &first_token_pos);
150
(!context->processing_create_statements && ("CREATE" == token)) ||
151
(!context->processing_alter_statements && ("ALTER" == token)) ||
152
(!context->processing_drop_statements && ("DROP" == token)) ||
153
(context->ignore_dml && ("SELECT" == token)) ||
154
(context->ignore_dml && ("INSERT" == token)) ||
155
(context->is_ast_generation_enabled && ("INSERT" == token) &&
156
(context->max_insert_statement_size != 0) && (strlen(statement) > context->max_insert_statement_size)) ||
157
(context->ignore_dml && ("DELETE" == token)) ||
158
(context->ignore_dml && ("UPDATE" == token))
160
return (relevant || !irrelevant);
164
// this function removes comment braces of form /*!NUMBER */
165
// making their contents a part of the query itself
167
void remove_versioning_comments(const std::string &sql, std::string &effective_sql, CHARSET_INFO *cs, bool *ignore_statement, int *first_versioning_comment_pos)
169
*first_versioning_comment_pos= -1;
171
const char *begin= sql.c_str();
172
const char *ptr= begin;
173
const char *endptr= ptr + sql.length();
177
for(; (ptr < endptr) && (*ptr != '/'); ptr += max(my_mbcharlen(cs, *ptr),1))
180
if ((ptr + 3) >= endptr)
183
if ((ptr[1] != '*') || (ptr[2] != '!'))
185
ptr += my_mbcharlen(cs, *ptr);
189
const char *start_start= ptr;
192
for(ptr= ptr + 3; (ptr < endptr) && (my_isdigit(cs, *ptr)); ptr += max(my_mbcharlen(cs, *ptr),1), digit_count++)
195
if (digit_count == 0)
198
const char *start_end= ptr;
200
// in case of parsing mysqldump files, there are 'create table' statements embraced with comments.
201
// they are to be ignored, because they relate to views, not tables.
202
if (ignore_statement)
203
*ignore_statement= (0 == strncmp(ptr, " CREATE TABLE", 13));
206
register bool quoted= false;
207
register bool escaped= false;
208
register bool commented= false;
209
int nested_comments_count= 1;
212
for(; ptr < endptr-1; ++ptr)
214
escaped= (!commented && quoted && ('\\' == ptr[0])) ? !escaped : false;
219
if ((1 == nested_comments_count) && !quoted)
229
if (!commented && !quoted && ('*' == ptr[1]))
230
++nested_comments_count;
234
if (!commented && !quoted && ('/' == ptr[1]))
236
if (0 == --nested_comments_count)
243
if (!escaped && !commented)
247
if (ptr[0] == quot_sym)
268
if (effective_sql.empty())
270
*first_versioning_comment_pos= start_start - begin;
274
// replace comments in-place with spaces
275
effective_sql.replace(start_start - begin, start_end - start_start, start_end - start_start, ' ');
276
effective_sql.replace(ptr - begin, 2, 2, ' ');
283
Mysql_sql_parser_fe::SqlMode::SqlMode()
289
void Mysql_sql_parser_fe::SqlMode::reset()
291
MODE_ANSI_QUOTES= false;
292
MODE_HIGH_NOT_PRECEDENCE= false;
293
MODE_PIPES_AS_CONCAT= false;
294
MODE_NO_BACKSLASH_ESCAPES= false;
295
MODE_IGNORE_SPACE= false;
299
void Mysql_sql_parser_fe::SqlMode::parse(const std::string &text_value)
303
std::string sql_mode_string= base::toupper(text_value);
304
std::istringstream iss(sql_mode_string);
306
while (std::getline(iss, mode, ','))
308
if (mode == "ANSI" || mode == "DB2" || mode == "MSSQL" || mode == "ORACLE" || mode == "POSTGRESQL")
310
MODE_ANSI_QUOTES= true;
311
MODE_PIPES_AS_CONCAT= true;
312
MODE_IGNORE_SPACE= true;
314
else if (mode == "ANSI_QUOTES")
315
MODE_ANSI_QUOTES= true;
316
else if (mode == "PIPES_AS_CONCAT")
317
MODE_PIPES_AS_CONCAT= true;
318
else if (mode == "NO_BACKSLASH_ESCAPES")
319
MODE_NO_BACKSLASH_ESCAPES= true;
320
else if (mode == "IGNORE_SPACE")
321
MODE_IGNORE_SPACE= true;
326
Mysql_sql_parser_fe::Mysql_sql_parser_fe(grt::GRT *grt)
330
max_insert_statement_size(0),
331
processing_create_statements(true),
332
processing_alter_statements(true),
333
processing_drop_statements(true),
334
is_ast_generation_enabled(true),
337
bec::GRTManager *grtm= bec::GRTManager::get_instance_for(_grt);
338
grt::ValueRef sql_mode_string;
340
sql_mode_string= grtm->get_app_option("SqlMode");
341
if (sql_mode_string.is_valid() && grt::StringRef::can_wrap(sql_mode_string))
342
sql_mode.parse(grt::StringRef::cast_from(sql_mode_string));
346
GStaticMutex _parser_fe_critical_section= G_STATIC_MUTEX_INIT;
349
void Mysql_sql_parser_fe::reset()
351
SqlAstStatics::tree(NULL);
352
::parser_is_stopped= false;
354
static bool initialized= false;
363
int Mysql_sql_parser_fe::stop()
365
return ::parser_is_stopped= true;
369
void Mysql_sql_parser_fe::parse_sql_mode(const std::string &sql_mode_string)
371
sql_mode.parse(sql_mode_string);
375
int Mysql_sql_parser_fe::parse_sql_script(const std::string &sql, fe_process_sql_statement_callback cb, void *user_data)
377
bec::GStaticMutexLock parser_fe_critical_section(_parser_fe_critical_section);
379
Context context= {this, cb, user_data, 0, ignore_dml, is_ast_generation_enabled, max_insert_statement_size, processing_create_statements, processing_alter_statements, processing_drop_statements, sql_mode};
380
myx_process_sql_statements(sql.c_str(), Lex_helper::charset(), &process_sql_statement_cb, &context, MYX_SPM_NORMAL_MODE);
381
return context.err_count;
385
int Mysql_sql_parser_fe::parse_sql_script_file(const std::string &filename, fe_process_sql_statement_callback cb, void *user_data)
387
bec::GStaticMutexLock parser_fe_critical_section(_parser_fe_critical_section);
389
Context context= {this, cb, user_data, 0, ignore_dml, is_ast_generation_enabled, max_insert_statement_size, processing_create_statements, processing_alter_statements, processing_drop_statements, sql_mode};
390
myx_process_sql_statements_from_file(filename.c_str(), Lex_helper::charset(), &process_sql_statement_cb, &context, MYX_SPM_NORMAL_MODE/*MYX_SPM_DELIMS_REQUIRED*/);
391
return context.err_count;
395
int Mysql_sql_parser_fe::escape_string(const std::string &in_text, std::string &out_text)
397
boost::scoped_array<char> out(new char[in_text.size()*2+1]);
398
int res= escape_string(out.get(), 0, in_text.c_str(), in_text.size());
404
int Mysql_sql_parser_fe::escape_string(char *out, unsigned long out_size, const char *in, unsigned long in_size)
406
static CHARSET_INFO *cs= get_charset_by_name(MYSQL_DEFAULT_CHARSET, MYF(0));
407
return mysql_parser::escape_string_for_mysql(cs, out, out_size, in, in_size);
411
int Mysql_sql_parser_fe::process_sql_statement_cb(const MyxStatementParser *splitter, const char *statement, void *context_ptr)
413
// possible values for result:
414
// -1 - statement was ignored
415
// 0 - statement was successfully processed
416
// 1 - error occured during statement processing
419
if (::parser_is_stopped)
422
Context *context= reinterpret_cast <Context *> (context_ptr);
424
if (!context || !context->cb)
427
// check if statement is in utf8 encoding
428
if (!g_utf8_validate(statement, -1, NULL))
432
const char *c= statement - 1;
435
if (base::EolHelpers::is_eol(++c))
441
std::string err_msg= "SQL statement starting from pointed line contains non UTF8 characters";
442
context->cb(context->data, splitter, statement, NULL, 0, 0, stmt_lc, 0, stmt_lc, 0, 0, err_msg);
443
context->err_count++;
447
// stripe comments before further statement processing because
448
// mysqldump puts the whole DDL in comments e.g. for triggers
449
std::string orig_sql(statement);
450
std::string effective_sql;
451
bool ignore_statement= false;
452
int first_versioning_comment_pos;
453
remove_versioning_comments(orig_sql, effective_sql, Lex_helper::charset(), &ignore_statement, &first_versioning_comment_pos);
454
const std::string &sql= effective_sql.empty() ? orig_sql : effective_sql;
456
// filter inappropriate statements
457
if (ignore_statement || !is_statement_relevant(sql.c_str(), context))
458
return -1; // ignored
460
// parse/generate AST
461
LEX_HELPER(sql.c_str(), context->sql_mode, context->is_ast_generation_enabled)
463
const SqlAstNode *tree= SqlAstStatics::tree();
465
// in case of syntax error extend err message with context
466
std::string err_msg= myx_get_err_msg();
467
int err_tok_line_pos= 0;
469
int err_tok_lineno= _lex_helper.lex()->yylineno;
474
if (!_lex_helper.lex()->last_item || (_lex_helper.lex()->first_item->value_length() == -1))
480
else if ("syntax error" == err_msg)
482
if (const SqlAstNode *item= _lex_helper.lex()->last_item)
484
static const size_t MAX_SQL_CONTEXT_SIZE= 80;
485
std::string statement_= statement;
486
size_t boffset= item->stmt_boffset();
487
std::string err_context= statement_.substr(boffset,
488
std::min<size_t>(statement_.size() - boffset, MAX_SQL_CONTEXT_SIZE));
491
.append("SQL syntax error near '")
495
determine_token_position(item, splitter, statement, err_tok_lineno, err_tok_line_pos, err_tok_len);
500
int stmt_begin_lineno= -1;
501
int stmt_begin_line_pos= -1;
502
if (const SqlAstNode *first_item= _lex_helper.lex()->first_item)
504
stmt_begin_lineno= first_item->stmt_lineno();
505
stmt_begin_line_pos= 0;
507
determine_token_position(first_item, splitter, statement, stmt_begin_lineno, stmt_begin_line_pos, tok_len);
510
int stmt_end_lineno= -1;
511
int stmt_end_line_pos= -1;
512
if (const SqlAstNode *last_item= _lex_helper.lex()->last_item)
514
stmt_end_lineno= last_item->stmt_lineno();
515
stmt_end_line_pos= 0;
517
bool is_tok_multiline= false;
518
int alt_stmt_end_line_pos= 0;
520
determine_token_position(last_item, splitter, statement, stmt_end_lineno, stmt_end_line_pos, tok_len);
522
for (const char *c= (statement + last_item->stmt_boffset()), *end= (statement + last_item->stmt_boffset() + tok_len); c < end; ++c)
524
if (base::EolHelpers::is_eol(c))
527
// in case of multi-line token the line position of its end needs adjustment
528
is_tok_multiline= true;
529
alt_stmt_end_line_pos= 0;
533
++alt_stmt_end_line_pos;
536
if (is_tok_multiline)
537
stmt_end_line_pos= alt_stmt_end_line_pos;
539
stmt_end_line_pos += tok_len;
541
// closing quote possible which must be included into the range
542
int statement_non_token_ending_len= 0;
543
switch (*(statement + last_item->stmt_boffset() + tok_len))
548
++statement_non_token_ending_len;
551
stmt_end_line_pos += statement_non_token_ending_len;
554
// call callback function to process generated AST or syntax error
555
result= context->cb(context->data, splitter,
558
stmt_begin_lineno, stmt_begin_line_pos, stmt_end_lineno, stmt_end_line_pos,
559
err_tok_lineno, err_tok_line_pos, err_tok_len, err_msg);
561
context->err_count++;
563
if ((context->sql_parser_fe->max_err_count > 0) && (context->sql_parser_fe->max_err_count <= context->err_count))
564
context->sql_parser_fe->stop();
570
void Mysql_sql_parser_fe::determine_token_position(const SqlAstNode *item, const MyxStatementParser *splitter, const char *statement, int &lineno, int &token_line_pos, int &token_len)
572
lineno= item->stmt_lineno();
573
const char *tokenbeg= statement + item->stmt_boffset();
574
const char *tokenend= statement + item->stmt_eoffset();
578
bool initial_token_end_was_eol= false;
580
if (std::isspace((unsigned char)*(tokenend-1)))
582
// this is a case when token end is set to the beginning of the subsequent token
585
else if (base::EolHelpers::is_eol(tokenend))
587
// if initial token end is set to EOL then this EOL doesn't change effective token lineno
589
initial_token_end_was_eol= true;
592
if (std::isspace((unsigned char)*tokenend) && (tokenend-1 > tokenbeg))
594
// there can be multiple trailing token delimiters
595
while (std::isspace((unsigned char)*tokenend) && (tokenend > tokenbeg))
599
else if (initial_token_end_was_eol)
603
// find beginning of the line
604
const char *linebeg= tokenbeg;
605
for (; (linebeg > statement) && ('\n' != *linebeg) && ('\r' != *linebeg); --linebeg) {}
606
if (('\n' == *linebeg) || ('\r' == *linebeg))
609
static CHARSET_INFO *cs= get_charset_by_name(MYSQL_DEFAULT_CHARSET, MYF(0));
611
// translate boffset/eoffset into position within the line
612
// taking into account active encoding
613
const char *ptr= linebeg;
614
while (ptr < tokenbeg)
616
ptr+= max(my_mbcharlen(cs, *ptr), 1);
621
// taking into account active encoding
622
while (ptr < tokenend)
625
ptr+= max(my_mbcharlen(cs, *ptr), 1);
628
// first line is special because it may contain part of previous statement ending with statements delimiter
629
// while token position is specified relative to current statement
631
token_line_pos+= splitter->statement_first_line_first_symbol_pos();
635
std::string Mysql_sql_parser_fe::get_first_sql_token(const std::string &sql, const std::string &versioning_comment_subst_token)
637
bec::GStaticMutexLock parser_fe_critical_section(_parser_fe_critical_section);
639
static Mysql_sql_parser_fe::SqlMode sql_mode;
641
std::string effective_sql;
642
bool ignore_statement= false;
643
int first_versioning_comment_pos;
644
remove_versioning_comments(sql, effective_sql, Lex_helper::charset(), &ignore_statement, &first_versioning_comment_pos);
645
const std::string &sql_= effective_sql.empty() ? sql : effective_sql;
648
std::string token= ::get_first_sql_token(sql_.c_str(), sql_mode, &first_token_pos);
650
if ((first_versioning_comment_pos > -1) && (first_token_pos > -1) && (first_versioning_comment_pos < first_token_pos) && !versioning_comment_subst_token.empty())
651
return versioning_comment_subst_token;
b'\\ No newline at end of file'