1
# This program is copyright 2009-2011 Percona Inc.
2
# Feedback and improvements are welcome.
4
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
5
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
6
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
8
# This program is free software; you can redistribute it and/or modify it under
9
# the terms of the GNU General Public License as published by the Free Software
10
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
11
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
14
# You should have received a copy of the GNU General Public License along with
15
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16
# Place, Suite 330, Boston, MA 02111-1307 USA.
17
# ###########################################################################
18
# SchemaIterator package $Revision: 7547 $
19
# ###########################################################################
22
# SchemaIterator iterates schema objects.
24
package SchemaIterator;
27
use warnings FATAL => 'all';
28
use English qw(-no_match_vars);
29
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
32
$Data::Dumper::Indent = 1;
33
$Data::Dumper::Sortkeys = 1;
34
$Data::Dumper::Quotekeys = 0;
36
my $open_comment = qr{/\*!\d{5} };
47
# Create a new SchemaIterator object with either a dbh or a file_itr.
53
# dbh - dbh to iterate. Mutually exclusive with file_itr.
54
# file_itr - <FileIterator::get_file_itr()> iterator for dump file.
55
# Mutually exclusive with dbh.
56
# OptionParser - <OptionParser> object. All filters are gotten from this
57
# obj: --databases, --tables, etc.
58
# Quoter - <Quoter> object.
61
# Schema - <Schema> object to initialize while iterating.
62
# MySQLDump - <MySQLDump> object to get CREATE TABLE when iterating dbh.
63
# TableParser - <TableParser> object to parse CREATE TABLE for tbl_struct.
64
# keep_ddl - Keep CREATE TABLE (default false)
67
# SchemaIterator object
69
my ( $class, %args ) = @_;
70
my @required_args = qw(OptionParser Quoter);
71
foreach my $arg ( @required_args ) {
72
die "I need a $arg argument" unless $args{$arg};
75
# Either a dbh or a file_itr is required, but not both.
76
my ($file_itr, $dbh) = @args{qw(file_itr dbh)};
77
die "I need either a dbh or file_itr argument"
78
if (!$dbh && !$file_itr) || ($dbh && $file_itr);
82
filters => _make_filters(%args),
85
return bless $self, $class;
89
# Create schema object filters from <OptionParser> options. The OptionParser
90
# object passed to <new()> is checked for filter options like --database,
91
# --tables, --ignore-tables, etc. For all such options, a hash is built
92
# keyed off the same option name. So $filter{tables} represents --tables,
93
# etc. Regex filters are pre-compiled. It is very important to avoid
94
# auto-vivifying certain key-values; see below. The filter hash is used
95
# in sub like <database_is_allowed()>.
97
# This sub is called from <new()>. That's the only place and time it
98
# needs to be called because options shouldn't change between runs.
103
# Required Arguments:
104
# OptionParser - <OptionParser> object. All filters are gotten from this
105
# obj: --databases, --tables, etc.
106
# Quoter - <Quoter> object.
109
# Hashref of filters keyed on corresponding option names.
112
my @required_args = qw(OptionParser Quoter);
113
foreach my $arg ( @required_args ) {
114
die "I need a $arg argument" unless $args{$arg};
116
my ($o, $q) = @args{@required_args};
120
# Do not auto-vivify things like $filters{database} else a check like
121
# if ( !$filters{databases}->{foo} ) will be TRUE when it should be FALSE
122
# if no --databases where given. When in doubt: SchemaIterator.t and
123
# check test coverage. These filters must be accurate or else we may
124
# access something the user doesn't want us to.
126
my @simple_filters = qw(
127
databases tables engines
128
ignore-databases ignore-tables ignore-engines);
130
foreach my $filter ( @simple_filters ) {
131
if ( $o->has($filter) ) {
132
my $objs = $o->get($filter);
133
next FILTER unless $objs && scalar keys %$objs;
134
my $is_table = $filter =~ m/table/ ? 1 : 0;
135
foreach my $obj ( keys %$objs ) {
136
die "Undefined value for --$filter" unless $obj;
139
my ($db, $tbl) = $q->split_unquote($obj);
140
# Database-qualified tables require special handling.
141
# See table_is_allowed().
143
MKDEBUG && _d('Filter', $filter, 'value:', $db, $tbl);
144
$filters{$filter}->{$tbl} = $db;
147
MKDEBUG && _d('Filter', $filter, 'value:', $obj);
148
$filters{$filter}->{$obj} = 1;
154
my @regex_filters = qw(
155
databases-regex tables-regex
156
ignore-databases-regex ignore-tables-regex);
158
foreach my $filter ( @regex_filters ) {
159
if ( $o->has($filter) ) {
160
my $pat = $o->get($filter);
161
next REGEX_FILTER unless $pat;
162
$filters{$filter} = qr/$pat/;
163
MKDEBUG && _d('Filter', $filter, 'value:', $filters{$filter});
167
MKDEBUG && _d('Schema object filters:', Dumper(\%filters));
171
# Sub: next_schema_object
172
# Return the next schema object or undef when no more schema objects.
173
# Only filtered schema objects are returned. If iterating dump files
174
# (i.e. the obj was created with a file_itr arg), then the returned
175
# schema object will always have a ddl (see below). But if iterating
176
# a dbh, then you must create the obj with a MySQLDump obj to get a ddl.
177
# If this object was created with a TableParser, then the ddl, if present,
181
# Hashref of schema object with at least a db and tbl keys, like
186
# ddl => 'CREATE TABLE `a` ( ...', # if keep_ddl
187
# tbl_struct => <TableParser::parse()> hashref of parsed ddl,
190
# The ddl is suitable for <TableParser::parse()>.
191
sub next_schema_object {
195
if ( $self->{file_itr} ) {
196
$schema_obj= $self->_iterate_files();
199
$schema_obj= $self->_iterate_dbh();
203
if ( $schema_obj->{ddl} && $self->{TableParser} ) {
204
$schema_obj->{tbl_struct}
205
= $self->{TableParser}->parse($schema_obj->{ddl});
208
delete $schema_obj->{ddl} unless $self->{keep_ddl};
210
if ( my $schema = $self->{Schema} ) {
211
$schema->add_schema_object($schema_obj);
215
MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl});
222
if ( !$self->{fh} ) {
223
my ($fh, $file) = $self->{file_itr}->();
225
MKDEBUG && _d('No more files to iterate');
229
$self->{file} = $file;
231
my $fh = $self->{fh};
232
MKDEBUG && _d('Getting next schema object from', $self->{file});
234
local $INPUT_RECORD_SEPARATOR = '';
236
while (defined(my $chunk = <$fh>)) {
237
if ($chunk =~ m/Database: (\S+)/) {
238
# If the file is a dump of one db, then the only indication of that
239
# db is in a comment at the start of the file like,
240
# -- Host: localhost Database: sakila
241
# If the dump is of multiple dbs, then there are both these same
242
# comments and USE statements. We look for the comment which is
243
# unique to both single and multi-db dumps.
245
$db =~ s/^`//; # strip leading `
246
$db =~ s/`$//; # and trailing `
247
if ( $self->database_is_allowed($db) ) {
251
elsif ($self->{db} && $chunk =~ m/CREATE TABLE/) {
252
if ($chunk =~ m/DROP VIEW IF EXISTS/) {
253
# Tables that are actually views have this DROP statment in the
254
# chunk just before the CREATE TABLE. We don't want views.
255
MKDEBUG && _d('Table is a VIEW, skipping');
259
# The open comment is usually present for a view table, which we
260
# probably already detected and skipped above, but this is left her
261
# just in case mysqldump wraps other CREATE TABLE statements in a
262
# a version comment that I don't know about yet.
263
my ($tbl) = $chunk =~ m/$tbl_name/;
266
if ( $self->table_is_allowed($self->{db}, $tbl) ) {
267
my ($ddl) = $chunk =~ m/^(?:$open_comment)?(CREATE TABLE.+?;)$/ms;
269
warn "Failed to parse CREATE TABLE from\n" . $chunk;
272
$ddl =~ s/ \*\/;\Z/;/; # remove end of version comment
274
my ($engine) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
276
if ( !$engine || $self->engine_is_allowed($engine) ) {
287
MKDEBUG && _d('No more schema objects in', $self->{file});
291
# Recurse to get next file and begin iterating it. If there's no next
292
# file, then the call will return undef and we'll return undef, too
293
return $self->_iterate_files();
298
my $q = $self->{Quoter};
299
my $dbh = $self->{dbh};
300
MKDEBUG && _d('Getting next schema object from dbh', $dbh);
302
if ( !defined $self->{dbs} ) {
303
# This happens once, the first time we're called.
304
my $sql = 'SHOW DATABASES';
306
my @dbs = grep { $self->database_is_allowed($_) }
307
@{$dbh->selectcol_arrayref($sql)};
308
MKDEBUG && _d('Found', scalar @dbs, 'databases');
309
$self->{dbs} = \@dbs;
312
if ( !$self->{db} ) {
313
$self->{db} = shift @{$self->{dbs}};
314
MKDEBUG && _d('Next database:', $self->{db});
315
return unless $self->{db};
318
if ( !defined $self->{tbls} ) {
319
my $sql = 'SHOW /*!50002 FULL*/ TABLES FROM ' . $q->quote($self->{db});
322
$_->[0]; # (tbl, type)
325
my ($tbl, $type) = @$_;
326
$self->table_is_allowed($self->{db}, $tbl)
327
&& (!$type || ($type ne 'VIEW'));
329
@{$dbh->selectall_arrayref($sql)};
330
MKDEBUG && _d('Found', scalar @tbls, 'tables in database', $self->{db});
331
$self->{tbls} = \@tbls;
334
while ( my $tbl = shift @{$self->{tbls}} ) {
336
if ( $self->{filters}->{'engines'}
337
|| $self->{filters}->{'ignore-engines'} ) {
338
my $sql = "SHOW TABLE STATUS FROM " . $q->quote($self->{db})
341
$engine = $dbh->selectrow_hashref($sql)->{engine};
342
MKDEBUG && _d($tbl, 'uses', $engine, 'engine');
346
if ( !$engine || $self->engine_is_allowed($engine) ) {
348
if ( my $du = $self->{MySQLDump} ) {
349
$ddl = $du->get_create_table($dbh, $q, $self->{db}, $tbl)->[1];
360
MKDEBUG && _d('No more tables in database', $self->{db});
362
$self->{tbls} = undef;
364
# Recurse to get the next database. If there's no next db, then the
365
# call will return undef and we'll return undef, too.
366
return $self->_iterate_dbh();
369
sub database_is_allowed {
370
my ( $self, $db ) = @_;
371
die "I need a db argument" unless $db;
375
my $filter = $self->{filters};
377
if ( $db =~ m/information_schema|performance_schema|lost\+found/ ) {
378
MKDEBUG && _d('Database', $db, 'is a system database, ignoring');
382
if ( $self->{filters}->{'ignore-databases'}->{$db} ) {
383
MKDEBUG && _d('Database', $db, 'is in --ignore-databases list');
387
if ( $filter->{'ignore-databases-regex'}
388
&& $db =~ $filter->{'ignore-databases-regex'} ) {
389
MKDEBUG && _d('Database', $db, 'matches --ignore-databases-regex');
393
if ( $filter->{'databases'}
394
&& !$filter->{'databases'}->{$db} ) {
395
MKDEBUG && _d('Database', $db, 'is not in --databases list, ignoring');
399
if ( $filter->{'databases-regex'}
400
&& $db !~ $filter->{'databases-regex'} ) {
401
MKDEBUG && _d('Database', $db, 'does not match --databases-regex, ignoring');
405
# MKDEBUG && _d('Database', $db, 'is allowed');
409
sub table_is_allowed {
410
my ( $self, $db, $tbl ) = @_;
411
die "I need a db argument" unless $db;
412
die "I need a tbl argument" unless $tbl;
417
my $filter = $self->{filters};
419
if ( $filter->{'ignore-tables'}->{$tbl}
420
&& ($filter->{'ignore-tables'}->{$tbl} eq '*'
421
|| $filter->{'ignore-tables'}->{$tbl} eq $db) ) {
422
MKDEBUG && _d('Table', $tbl, 'is in --ignore-tables list');
426
if ( $filter->{'ignore-tables-regex'}
427
&& $tbl =~ $filter->{'ignore-tables-regex'} ) {
428
MKDEBUG && _d('Table', $tbl, 'matches --ignore-tables-regex');
432
if ( $filter->{'tables'}
433
&& !$filter->{'tables'}->{$tbl} ) {
434
MKDEBUG && _d('Table', $tbl, 'is not in --tables list, ignoring');
438
if ( $filter->{'tables-regex'}
439
&& $tbl !~ $filter->{'tables-regex'} ) {
440
MKDEBUG && _d('Table', $tbl, 'does not match --tables-regex, ignoring');
444
# This handles a special case like "-d d2 -t d1.t1" where the user probably
445
# wants "all tables from database d1 plus table t1 from database d1." In
446
# _make_filters() we cannot add d1 to the allowed databases filter because
447
# then we'll get d1 tables when the user only wants d2 tables. So when
448
# a table passes allow filters, reaching this point, meaning it is allowed,
449
# we make this final to check to see if it's allowed in any database (*)
450
# or allowed in the specific database that the user qualifed the table with.
451
# The first two checks are to prevent auto-vivifying the filters which will
452
# cause bad results (see a similar comment in _make_filters()).
453
if ( $filter->{'tables'}
454
&& $filter->{'tables'}->{$tbl}
455
&& $filter->{'tables'}->{$tbl} ne '*'
456
&& $filter->{'tables'}->{$tbl} ne $db ) {
457
MKDEBUG && _d('Table', $tbl, 'is only allowed in database',
458
$filter->{'tables'}->{$tbl});
462
# MKDEBUG && _d('Table', $tbl, 'is allowed');
466
sub engine_is_allowed {
467
my ( $self, $engine ) = @_;
468
die "I need an engine argument" unless $engine;
470
$engine = lc $engine;
472
my $filter = $self->{filters};
474
if ( $filter->{'ignore-engines'}->{$engine} ) {
475
MKDEBUG && _d('Engine', $engine, 'is in --ignore-databases list');
479
if ( $filter->{'engines'}
480
&& !$filter->{'engines'}->{$engine} ) {
481
MKDEBUG && _d('Engine', $engine, 'is not in --engines list, ignoring');
485
# MKDEBUG && _d('Engine', $engine, 'is allowed');
490
my ($package, undef, $line) = caller 0;
491
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
492
map { defined $_ ? $_ : 'undef' }
494
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
499
# ###########################################################################
500
# End SchemaIterator package
501
# ###########################################################################