~percona-toolkit-dev/percona-toolkit/fix-log-parser-writer-bug-963225

« back to all changes in this revision

Viewing changes to lib/TableChunker.pm

Committer: Daniel Nichter
Date: 2011-06-24 17:22:06 UTC
Revision ID: daniel@percona.com-20110624172206-c7q4s4ad6r260zz6

Add lib/, t/lib/, and sandbox/. All modules are updated and passing on MySQL 5.1.

files added:
lib/Advisor.pm

lib/AdvisorRules.pm

lib/BinaryLogParser.pm

lib/ChangeHandler.pm

lib/CompareQueryTimes.pm

lib/CompareResults.pm

lib/CompareTableStructs.pm

lib/CompareWarnings.pm

lib/CopyRowsInsertSelect.pm

lib/DSNParser.pm

lib/Daemon.pm

lib/DuplicateKeyFinder.pm

lib/EventAggregator.pm

lib/EventTimeline.pm

lib/ExecutionThrottler.pm

lib/ExplainAnalyzer.pm

lib/FileIterator.pm

lib/ForeignKeyIterator.pm

lib/GeneralLogParser.pm

lib/HTTPProtocolParser.pm

lib/IndexUsage.pm

lib/InnoDBStatusParser.pm

lib/KeySize.pm

lib/LogSplitter.pm

lib/MaatkitTest.pm

lib/MasterSlave.pm

lib/MemcachedEvent.pm

lib/MemcachedProtocolParser.pm

lib/MockSth.pm

lib/MockSync.pm

lib/MockSyncStream.pm

lib/MySQLConfig.pm

lib/MySQLConfigComparer.pm

lib/MySQLDump.pm

lib/MySQLProtocolParser.pm

lib/OSCCaptureSync.pm

lib/OptionParser.pm

lib/Outfile.pm

lib/PgLogParser.pm

lib/Pipeline.pm

lib/PodParser.pm

lib/Processlist.pm

lib/ProcesslistAggregator.pm

lib/Progress.pm

lib/ProtocolParser.pm

lib/QueryAdvisorRules.pm

lib/QueryParser.pm

lib/QueryReportFormatter.pm

lib/QueryReview.pm

lib/QueryRewriter.pm

lib/Quoter.pm

lib/ReportFormatter.pm

lib/Retry.pm

lib/RowDiff.pm

lib/Runtime.pm

lib/SQLParser.pm

lib/Sandbox.pm

lib/Schema.pm

lib/SchemaIterator.pm

lib/SimpleTCPDumpParser.pm

lib/SlowLogParser.pm

lib/SlowLogWriter.pm

lib/SysLogParser.pm

lib/TCPRequestAggregator.pm

lib/TableChecksum.pm

lib/TableChunker.pm

lib/TableNibbler.pm

lib/TableParser.pm

lib/TableSyncChunk.pm

lib/TableSyncGroupBy.pm

lib/TableSyncNibble.pm

lib/TableSyncStream.pm

lib/TableSyncer.pm

lib/TableUsage.pm

lib/TcpdumpParser.pm

lib/TextResultSetParser.pm

lib/TimeSeriesTrender.pm

lib/Transformers.pm

lib/UpgradeReportFormatter.pm

lib/VariableAdvisorRules.pm

lib/VersionParser.pm

sandbox

sandbox/load-sakila-db

sandbox/sakila-db

sandbox/sakila-db/sakila-data.sql

sandbox/sakila-db/sakila-schema.sql

sandbox/servers

sandbox/servers/4.1

sandbox/servers/4.1/data.tar.gz

sandbox/servers/4.1/my.sandbox.cnf

sandbox/servers/5.0

sandbox/servers/5.0/data.tar.gz

sandbox/servers/5.0/my.sandbox.cnf

sandbox/servers/5.1

sandbox/servers/5.1/data.tar.gz

sandbox/servers/5.1/my.sandbox.cnf

sandbox/servers/5.5

sandbox/servers/5.5/data.tar.gz

sandbox/servers/5.5/my.sandbox.cnf

sandbox/servers/start

sandbox/servers/stop

sandbox/servers/use

sandbox/start-sandbox

sandbox/stop-sandbox

sandbox/test-env

t/lib

t/lib/Advisor.t

t/lib/BinaryLogParser.t

t/lib/ChangeHandler.t

t/lib/CompareQueryTimes.t

t/lib/CompareResults.t

t/lib/CompareWarnings.t

t/lib/CopyRowsInsertSelect.t

t/lib/DSNParser.t

t/lib/Daemon.t

t/lib/DuplicateKeyFinder.t

t/lib/EventAggregator.t

t/lib/EventTimeline.t

t/lib/ExecutionThrottler.t

t/lib/ExplainAnalyzer.t

t/lib/FileIterator.t

t/lib/ForeignKeyIterator.t

t/lib/GeneralLogParser.t

t/lib/HTTPProtocolParser.t

t/lib/IndexUsage.t

t/lib/InnoDBStatusParser.t

t/lib/KeySize.t

t/lib/LogSplitter.t

t/lib/MasterSlave.t

t/lib/MemcachedEvent.t

t/lib/MemcachedProtocolParser.t

t/lib/MockSth.t

t/lib/MockSyncStream.t

t/lib/MySQLConfig.t

t/lib/MySQLConfigComparer.t

t/lib/MySQLDump.t

t/lib/MySQLProtocolParser.t

t/lib/OSCCaptureSync.t

t/lib/OptionParser.t

t/lib/Outfile.t

t/lib/PgLogParser.t

t/lib/Pipeline.t

t/lib/PodParser.t

t/lib/Processlist.t

t/lib/ProcesslistAggregator.t

t/lib/Progress.t

t/lib/ProtocolParser.t

t/lib/QueryAdvisorRules.t

t/lib/QueryParser.t

t/lib/QueryReportFormatter.t

t/lib/QueryReview.t

t/lib/QueryRewriter.t

t/lib/Quoter.t

t/lib/ReportFormatter.t

t/lib/Retry.t

t/lib/RowDiff-custom.t

t/lib/RowDiff.t

t/lib/Runtime.t

t/lib/SQLParser.t

t/lib/Schema.t

t/lib/SchemaIterator.t

t/lib/SimpleTCPDumpParser.t

t/lib/SlowLogParser.t

t/lib/SlowLogWriter.t

t/lib/SysLogParser.t

t/lib/TCPRequestAggregator.t

t/lib/TableChecksum.t

t/lib/TableChunker.t

t/lib/TableNibbler.t

t/lib/TableParser.pl

t/lib/TableParser.t

t/lib/TableSyncChunk.t

t/lib/TableSyncGroupBy.t

t/lib/TableSyncNibble.t

t/lib/TableSyncStream.t

t/lib/TableSyncer.t

t/lib/TableUsage.t

t/lib/TcpdumpParser.t

t/lib/TextResultSetParser.t

t/lib/TimeSeriesTrender.t

t/lib/Transformers.t

t/lib/UpgradeReportFormatter.t

t/lib/VariableAdvisorRules.t

t/lib/VersionParser.t

t/lib/foo.t

t/lib/samples

t/lib/samples/00_geodb_coordinates.sql

t/lib/samples/CopyRowsNormalized

t/lib/samples/CopyRowsNormalized/raw-data.sql

t/lib/samples/CopyRowsNormalized/tbls001.sql

t/lib/samples/CopyRowsNormalized/tbls002.sql

t/lib/samples/CopyRowsNormalized/tbls003.sql

t/lib/samples/ForeignKeyIterator

t/lib/samples/ForeignKeyIterator/fktbls001.sql

t/lib/samples/ForeignKeyIterator/fktbls002.sql

t/lib/samples/LogSplitter

t/lib/samples/LogSplitter/binlog010.txt

t/lib/samples/LogSplitter/slow006-random-1.txt

t/lib/samples/LogSplitter/slow006-random-2.txt

t/lib/samples/OptionParser

t/lib/samples/OptionParser/help001.txt

t/lib/samples/OptionParser/help002.txt

t/lib/samples/OptionParser/help003.txt

t/lib/samples/OptionParser/help004.txt

t/lib/samples/OptionParser/help005.txt

t/lib/samples/OptionParser/help006.txt

t/lib/samples/OptionParser/help007.txt

t/lib/samples/OptionParser/help008.txt

t/lib/samples/OptionParser/help009.txt

t/lib/samples/OptionParser/help010.txt

t/lib/samples/QueryReportFormatter

t/lib/samples/QueryReportFormatter/report001.txt

t/lib/samples/QueryReportFormatter/report002.txt

t/lib/samples/QueryReportFormatter/report003.txt

t/lib/samples/QueryReportFormatter/report004.txt

t/lib/samples/QueryReportFormatter/report005.txt

t/lib/samples/QueryReportFormatter/report006.txt

t/lib/samples/QueryReportFormatter/report007.txt

t/lib/samples/QueryReportFormatter/report008.txt

t/lib/samples/QueryReportFormatter/report009.txt

t/lib/samples/QueryReportFormatter/report010.txt

t/lib/samples/QueryReportFormatter/report011.txt

t/lib/samples/QueryReportFormatter/report012.txt

t/lib/samples/QueryReportFormatter/report013.txt

t/lib/samples/QueryReportFormatter/report014.txt

t/lib/samples/QueryReportFormatter/report015.txt

t/lib/samples/QueryReportFormatter/report016.txt

t/lib/samples/QueryReportFormatter/report017.txt

t/lib/samples/QueryReportFormatter/report018.txt

t/lib/samples/QueryReportFormatter/report019.txt

t/lib/samples/QueryReportFormatter/report020.txt

t/lib/samples/QueryReportFormatter/report021.txt

t/lib/samples/QueryReportFormatter/report022.txt

t/lib/samples/QueryReportFormatter/report023.txt

t/lib/samples/QueryReportFormatter/report024.txt

t/lib/samples/QueryReportFormatter/report025.txt

t/lib/samples/QueryReportFormatter/report026.txt

t/lib/samples/QueryReportFormatter/report027.txt

t/lib/samples/QueryReportFormatter/report028.txt

t/lib/samples/QueryReportFormatter/report029.txt

t/lib/samples/QueryReportFormatter/table.sql

t/lib/samples/ReportFormatter

t/lib/samples/ReportFormatter/report001.txt

t/lib/samples/SchemaIterator

t/lib/samples/SchemaIterator.sql

t/lib/samples/SchemaIterator/all-dbs-tbls.txt

t/lib/samples/SchemaIterator/all-dbs.txt

t/lib/samples/SchemaIterator/dump001-twice.txt

t/lib/samples/SchemaIterator/mysql-user-ddl.txt

t/lib/samples/TableParser-prefix_idx.sql

t/lib/samples/after-TableSyncChunk.sql

t/lib/samples/ansi_quotes.sql

t/lib/samples/auto-increment-actor.sql

t/lib/samples/before-TableSyncChunk.sql

t/lib/samples/before-TableSyncGroupBy.sql

t/lib/samples/before-TableSyncNibble.sql

t/lib/samples/binlogs

t/lib/samples/binlogs/binlog001.txt

t/lib/samples/binlogs/binlog002.txt

t/lib/samples/binlogs/binlog003.txt

t/lib/samples/binlogs/binlog004.txt

t/lib/samples/binlogs/binlog005.txt

t/lib/samples/binlogs/binlog006.txt

t/lib/samples/binlogs/binlog007.txt

t/lib/samples/binlogs/binlog008.txt

t/lib/samples/binlogs/binlog009.txt

t/lib/samples/binlogs/binlog010.txt

t/lib/samples/broken_tbl.frm

t/lib/samples/char-chunking

t/lib/samples/char-chunking/ascii.sql

t/lib/samples/char-chunking/latin1_chars.sql

t/lib/samples/char-chunking/world-city.sql

t/lib/samples/check_table.sql

t/lib/samples/col_types.sql

t/lib/samples/compare-results.sql

t/lib/samples/compare-warnings.sql

t/lib/samples/config_file_1.conf

t/lib/samples/config_file_2.conf

t/lib/samples/configs

t/lib/samples/configs/mycnf-baron-001.txt

t/lib/samples/configs/mycnf-baron-002.txt

t/lib/samples/configs/mycnf001.txt

t/lib/samples/configs/mycnf002.txt

t/lib/samples/configs/myprintdef001.txt

t/lib/samples/configs/mysqldhelp001.txt

t/lib/samples/configs/mysqldhelp002.txt

t/lib/samples/configs/mysqldhelp003.txt

t/lib/samples/cpuinfo01.txt

t/lib/samples/daemonizes.pl

t/lib/samples/date.sql

t/lib/samples/datetime.sql

t/lib/samples/daycol.sql

t/lib/samples/decimalcol.sql

t/lib/samples/doublecol.sql

t/lib/samples/dupe_fk_one.sql

t/lib/samples/dupe_fulltext_exact.sql

t/lib/samples/dupe_fulltext_reverse_order.sql

t/lib/samples/dupe_if_it_were_innodb.sql

t/lib/samples/dupe_key.sql

t/lib/samples/dupe_key_reversed.sql

t/lib/samples/dupe_key_unordered.sql

t/lib/samples/dupe_keys_different_type.sql

t/lib/samples/dupe_keys_thrice.sql

t/lib/samples/dupekeys

t/lib/samples/dupekeys/issue-1004.txt

t/lib/samples/dupekeys/issue-1192.sql

t/lib/samples/dupekeys/issue-904-1.txt

t/lib/samples/dupekeys/issue-904-2.txt

t/lib/samples/empty

t/lib/samples/empty_tables.sql

t/lib/samples/errlogs

t/lib/samples/errlogs/errlog001.txt

t/lib/samples/errlogs/errlog002.txt

t/lib/samples/errlogs/errlog003.txt

t/lib/samples/errlogs/errlog004.txt

t/lib/samples/errlogs/errlog005.txt

t/lib/samples/errlogs/errlog006.txt

t/lib/samples/errlogs/errlog007.txt

t/lib/samples/errlogs/errlog008.txt

t/lib/samples/errlogs/errlog009.txt

t/lib/samples/errlogs/errlog010.txt

t/lib/samples/errlogs/patterns.txt

t/lib/samples/floatcol.sql

t/lib/samples/genlogs

t/lib/samples/genlogs/genlog001.txt

t/lib/samples/genlogs/genlog002.txt

t/lib/samples/genlogs/genlog003.txt

t/lib/samples/http

t/lib/samples/http/http_tcpdump001.txt

t/lib/samples/http/http_tcpdump002.txt

t/lib/samples/http/http_tcpdump003.txt

t/lib/samples/http/http_tcpdump004.txt

t/lib/samples/http/http_tcpdump005.txt

t/lib/samples/http/http_tcpdump006.txt

t/lib/samples/http/http_tcpdump007.txt

t/lib/samples/http/http_tcpdump008.txt

t/lib/samples/http/http_tcpdump009.txt

t/lib/samples/huge_explicit_cols_values.txt

t/lib/samples/huge_insert_ignore_into_values.txt

t/lib/samples/huge_replace_into_values.txt

t/lib/samples/hyphentest.sql

t/lib/samples/innodb_dupe.sql

t/lib/samples/is001.txt

t/lib/samples/issue_109-01-v50.sql

t/lib/samples/issue_109-01-v51.sql

t/lib/samples/issue_11.sql

t/lib/samples/issue_131_ins.sql

t/lib/samples/issue_131_sel.sql

t/lib/samples/issue_132.sql

t/lib/samples/issue_262.sql

t/lib/samples/issue_269-1.sql

t/lib/samples/issue_295-1.sql

t/lib/samples/issue_330_backtick_pair_in_col_comments.sql

t/lib/samples/issue_331-parent.sql

t/lib/samples/issue_331.sql

t/lib/samples/issue_364-data.sql

t/lib/samples/issue_364.sql

t/lib/samples/issue_47.sql

t/lib/samples/issue_616.sql

t/lib/samples/issue_641.sql

t/lib/samples/issue_729.sql

t/lib/samples/issue_8.sql

t/lib/samples/issue_804.sql

t/lib/samples/issue_9-1.sql

t/lib/samples/issue_9-2.sql

t/lib/samples/issue_9-3.sql

t/lib/samples/issue_9-4.sql

t/lib/samples/issue_9-5.sql

t/lib/samples/issue_9-6.sql

t/lib/samples/issue_9-7.sql

t/lib/samples/issue_94.sql

t/lib/samples/issue_941.sql

t/lib/samples/issue_956.sql

t/lib/samples/issue_96.sql

t/lib/samples/log001.txt

t/lib/samples/log_splitter.pl

t/lib/samples/maxsessionfiles_01

t/lib/samples/maxsessionfiles_02

t/lib/samples/memcached

t/lib/samples/memcached/memc_tcpdump001.txt

t/lib/samples/memcached/memc_tcpdump002.txt

t/lib/samples/memcached/memc_tcpdump003.txt

t/lib/samples/memcached/memc_tcpdump004.txt

t/lib/samples/memcached/memc_tcpdump005.txt

t/lib/samples/memcached/memc_tcpdump006.txt

t/lib/samples/memcached/memc_tcpdump007.txt

t/lib/samples/memcached/memc_tcpdump008.txt

t/lib/samples/memcached/memc_tcpdump009.txt

t/lib/samples/memcached/memc_tcpdump010.txt

t/lib/samples/memcached/memc_tcpdump011.txt

t/lib/samples/memcached/memc_tcpdump013.txt

t/lib/samples/memcached/memc_tcpdump014.txt

t/lib/samples/memcached/memc_tcpdump015.txt

t/lib/samples/memcached/memc_tcpdump016.txt

t/lib/samples/mixed-case.sql

t/lib/samples/mysql_db.sql

t/lib/samples/mysql_proto_001.txt

t/lib/samples/mysql_proto_002.txt

t/lib/samples/mysql_proto_003.txt

t/lib/samples/mysqld_01_issue_58.txt

t/lib/samples/mysqldump-no-data

t/lib/samples/mysqldump-no-data/all-dbs.txt

t/lib/samples/mysqldump-no-data/dump001.txt

t/lib/samples/mysqldump-no-data/dump002.txt

t/lib/samples/mysqldump-no-data/one-db.txt

t/lib/samples/mysqldump-no-data/two-dbs.txt

t/lib/samples/ndb_table.sql

t/lib/samples/no-auto-increment-actor.sql

t/lib/samples/no_keys.sql

t/lib/samples/non_pk_ck.sql

t/lib/samples/nondupe_fulltext.sql

t/lib/samples/nondupe_fulltext_not_exact.sql

t/lib/samples/noquotes.sql

t/lib/samples/one_fk.sql

t/lib/samples/one_key.sql

t/lib/samples/osc

t/lib/samples/osc/capsync001.txt

t/lib/samples/osc/copyins001.txt

t/lib/samples/osc/tbl001.sql

t/lib/samples/outfile001.txt

t/lib/samples/pg

t/lib/samples/pg/pg-log-001.txt

t/lib/samples/pg/pg-log-002.txt

t/lib/samples/pg/pg-log-003.txt

t/lib/samples/pg/pg-log-004.txt

t/lib/samples/pg/pg-log-005.txt

t/lib/samples/pg/pg-log-006.txt

t/lib/samples/pg/pg-log-007.txt

t/lib/samples/pg/pg-log-008.txt

t/lib/samples/pg/pg-log-009.txt

t/lib/samples/pg/pg-log-010.txt

t/lib/samples/pg/pg-syslog-001.txt

t/lib/samples/pg/pg-syslog-002.txt

t/lib/samples/pg/pg-syslog-003.txt

t/lib/samples/pg/pg-syslog-004.txt

t/lib/samples/pg/pg-syslog-005.txt

t/lib/samples/pg/pg-syslog-006.txt

t/lib/samples/pg/pg-syslog-007.txt

t/lib/samples/pg/pg-syslog-008.txt

t/lib/samples/pg/pg-syslog-009.txt

t/lib/samples/pg/pg-syslog-010.txt

t/lib/samples/pk_not_first.sql

t/lib/samples/pl

t/lib/samples/pl/recset001.txt

t/lib/samples/pl/recset002.txt

t/lib/samples/pl/recset003.txt

t/lib/samples/pl/recset004.txt

t/lib/samples/pl/recset005.txt

t/lib/samples/pl/recset006.txt

t/lib/samples/pl/recset007.txt

t/lib/samples/pl/recset008.txt

t/lib/samples/pl/recset009.txt

t/lib/samples/pl/recset010.txt

t/lib/samples/pod

t/lib/samples/pod/mqa-rule-LIT.001.pod

t/lib/samples/pod/pod_sample_01.txt

t/lib/samples/pod/pod_sample_02.txt

t/lib/samples/pod/pod_sample_03.txt

t/lib/samples/pod/pod_sample_04.txt

t/lib/samples/pod/pod_sample_05.txt

t/lib/samples/pod/pod_sample_06.txt

t/lib/samples/pod/pod_sample_dsn.txt

t/lib/samples/pod/pod_sample_issue_140.txt

t/lib/samples/pod/pod_sample_issue_92.txt

t/lib/samples/pod/pod_sample_miu.txt

t/lib/samples/pod/pod_sample_mqa.txt

t/lib/samples/podsample.txt

t/lib/samples/query_review.sql

t/lib/samples/relay-binlog001

t/lib/samples/relay-binlog001-at-1708.txt

t/lib/samples/sakila.film.sql

t/lib/samples/sakila.rental.float.sql

t/lib/samples/sakila.rental.null.sql

t/lib/samples/sakila.rental.null2.sql

t/lib/samples/sakila.rental.remix.sql

t/lib/samples/sakila.rental.sql

t/lib/samples/sakila_film.sql

t/lib/samples/schemas

t/lib/samples/schemas/schema-dump.sql

t/lib/samples/show-variables

t/lib/samples/show-variables/vars-baron-001.txt

t/lib/samples/show-variables/vars-baron-002.txt

t/lib/samples/show-variables/vars001.txt

t/lib/samples/show-variables/vars002.txt

t/lib/samples/show-variables/vars003.txt

t/lib/samples/show-variables/vars004.txt

t/lib/samples/simple-tcpdump

t/lib/samples/simple-tcpdump/simpletcp001.txt

t/lib/samples/simple-tcprequests

t/lib/samples/simple-tcprequests/simpletcp-requests001.txt

t/lib/samples/simple-tcprequests/simpletcp-requests002.txt

t/lib/samples/slowlogs

t/lib/samples/slowlogs/microslow001.txt

t/lib/samples/slowlogs/slow001-rewritten.txt

t/lib/samples/slowlogs/slow001.txt

t/lib/samples/slowlogs/slow002.txt

t/lib/samples/slowlogs/slow003.txt

t/lib/samples/slowlogs/slow004.txt

t/lib/samples/slowlogs/slow005.txt

t/lib/samples/slowlogs/slow006-session-1.txt

t/lib/samples/slowlogs/slow006-session-2.txt

t/lib/samples/slowlogs/slow006-session-3.txt

t/lib/samples/slowlogs/slow006.txt

t/lib/samples/slowlogs/slow007.txt

t/lib/samples/slowlogs/slow008.txt

t/lib/samples/slowlogs/slow009.txt

t/lib/samples/slowlogs/slow010.txt

t/lib/samples/slowlogs/slow011.txt

t/lib/samples/slowlogs/slow012.txt

t/lib/samples/slowlogs/slow013.txt

t/lib/samples/slowlogs/slow014.txt

t/lib/samples/slowlogs/slow015.txt

t/lib/samples/slowlogs/slow016.txt

t/lib/samples/slowlogs/slow017.txt

t/lib/samples/slowlogs/slow018.txt

t/lib/samples/slowlogs/slow019.txt

t/lib/samples/slowlogs/slow020.txt

t/lib/samples/slowlogs/slow021.txt

t/lib/samples/slowlogs/slow022.txt

t/lib/samples/slowlogs/slow023.txt

t/lib/samples/slowlogs/slow024.txt

t/lib/samples/slowlogs/slow025.txt

t/lib/samples/slowlogs/slow026.txt

t/lib/samples/slowlogs/slow027.txt

t/lib/samples/slowlogs/slow028.txt

t/lib/samples/slowlogs/slow029.txt

t/lib/samples/slowlogs/slow030.txt

t/lib/samples/slowlogs/slow031.txt

t/lib/samples/slowlogs/slow032-rewritten.txt

t/lib/samples/slowlogs/slow032.txt

t/lib/samples/slowlogs/slow033.txt

t/lib/samples/slowlogs/slow034.txt

t/lib/samples/slowlogs/slow035.txt

t/lib/samples/slowlogs/slow036.txt

t/lib/samples/slowlogs/slow037.txt

t/lib/samples/slowlogs/slow038.txt

t/lib/samples/slowlogs/slow039.txt.gz

t/lib/samples/slowlogs/slow040.txt.gz

t/lib/samples/slowlogs/slow041.txt

t/lib/samples/slowlogs/slow042.txt

t/lib/samples/slowlogs/slow043.txt

t/lib/samples/slowlogs/slow044.txt

t/lib/samples/slowlogs/slow045.txt

t/lib/samples/slowlogs/slow046.txt

t/lib/samples/slowlogs/slow047.txt

t/lib/samples/slowlogs/slow048.txt

t/lib/samples/slowlogs/slow049.txt

t/lib/samples/slowlogs/slow050.txt

t/lib/samples/slowlogs/slow051.txt

t/lib/samples/slowlogs/slow052.txt

t/lib/samples/slowlogs/slow053.txt

t/lib/samples/slowlogs/slowlogwriter001.txt

t/lib/samples/slowlogs/slowlogwriter002.txt

t/lib/samples/split_slow020.txt

t/lib/samples/t1.sql

t/lib/samples/tables

t/lib/samples/tables/issue-388.sql

t/lib/samples/tcpdump

t/lib/samples/tcpdump/tcpdump000.txt

t/lib/samples/tcpdump/tcpdump001.txt

t/lib/samples/tcpdump/tcpdump002.txt

t/lib/samples/tcpdump/tcpdump003.txt

t/lib/samples/tcpdump/tcpdump004.txt

t/lib/samples/tcpdump/tcpdump005.txt

t/lib/samples/tcpdump/tcpdump006.txt

t/lib/samples/tcpdump/tcpdump007.txt

t/lib/samples/tcpdump/tcpdump008.txt

t/lib/samples/tcpdump/tcpdump009.txt

t/lib/samples/tcpdump/tcpdump010.txt

t/lib/samples/tcpdump/tcpdump011.txt

t/lib/samples/tcpdump/tcpdump012.txt

t/lib/samples/tcpdump/tcpdump013.txt

t/lib/samples/tcpdump/tcpdump014.txt

t/lib/samples/tcpdump/tcpdump015.txt

t/lib/samples/tcpdump/tcpdump016.txt

t/lib/samples/tcpdump/tcpdump017.txt

t/lib/samples/tcpdump/tcpdump018.txt

t/lib/samples/tcpdump/tcpdump019-arg.txt

t/lib/samples/tcpdump/tcpdump019.txt

t/lib/samples/tcpdump/tcpdump020.txt

t/lib/samples/tcpdump/tcpdump021.txt

t/lib/samples/tcpdump/tcpdump022.txt

t/lib/samples/tcpdump/tcpdump023.txt

t/lib/samples/tcpdump/tcpdump024.txt

t/lib/samples/tcpdump/tcpdump025.txt

t/lib/samples/tcpdump/tcpdump026.txt

t/lib/samples/tcpdump/tcpdump027.txt

t/lib/samples/tcpdump/tcpdump028.txt

t/lib/samples/tcpdump/tcpdump029.txt

t/lib/samples/tcpdump/tcpdump030.txt

t/lib/samples/tcpdump/tcpdump031.txt

t/lib/samples/tcpdump/tcpdump032.txt

t/lib/samples/tcpdump/tcpdump033.txt

t/lib/samples/tcpdump/tcpdump034.txt

t/lib/samples/tcpdump/tcpdump035.txt

t/lib/samples/tcpdump/tcpdump036.txt

t/lib/samples/tcpdump/tcpdump037.txt

t/lib/samples/tcpdump/tcpdump038.txt

t/lib/samples/tcpdump/tcpdump039.txt

t/lib/samples/tcpdump/tcpdump040.txt

t/lib/samples/temporary_table.sql

t/lib/samples/timecol.sql

t/lib/samples/uppercase_names.sql

util

util/package

files removed:
util

util/checking

util/packaging

util/testing

Show diffs side-by-side

added added

removed removed

lib/TableChunker.pm

# Feedback and improvements are welcome.

# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED

# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF

# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

# This program is free software; you can redistribute it and/or modify it under

# the terms of the GNU General Public License as published by the Free Software

# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar

# systems, you can issue `man perlgpl' or `man perlartistic' to read these

# licenses.

# You should have received a copy of the GNU General Public License along with

# this program; if not, write to the Free Software Foundation, Inc., 59 Temple

# Place, Suite 330, Boston, MA 02111-1307 USA.

# ###########################################################################

# TableChunker package $Revision: 7169 $

# ###########################################################################

# Package: TableChunker

# TableChunker helps determine how to "chunk" a table. Chunk are

# pre-determined ranges of rows defined by boundary values (sometimes also

# called endpoints) on numeric or numeric-like columns, including date/time

# types. Any numeric column type that MySQL can do positional comparisons

# (<, <=, >, >=) on works. Chunking on character data is not supported yet

# (but see <issue 568 at http://code.google.com/p/maatkit/issues/detail?id=568>).

# Usually chunks range over all rows in a table but sometimes they only

# range over a subset of rows if an optional where arg is passed to various

# subs. In either case a chunk is like "`col` >= 5 AND `col` < 10". If

# col is of type int and is unique, then that chunk ranges over up to 5 rows.

# Chunks are included in WHERE clauses by various tools to do work on discrete

# chunks of the table instead of trying to work on the entire table at once.

# Chunks do not overlap and their size is configurable via the chunk_size arg

# passed to several subs. The chunk_size can be a number of rows or a size

# like 1M, in which case it's in estimated bytes of data. Real chunk sizes

# are usually close to the requested chunk_size but unless the optional exact

# arg is assed the real chunk sizes are approximate. Sometimes the

# distribution of values on the chunk colun can skew chunking. If, for

# example, col has values 0, 100, 101, ... then the zero value skews chunking.

# The zero_chunk arg handles this.

{

package TableChunker;

use strict;

use warnings FATAL => 'all';

use English qw(-no_match_vars);

use constant MKDEBUG => $ENV{MKDEBUG} || 0;

use POSIX qw(floor ceil);

use List::Util qw(min max);

use Data::Dumper;

$Data::Dumper::Indent = 1;

$Data::Dumper::Sortkeys = 1;

$Data::Dumper::Quotekeys = 0;

# Sub: new

# Parameters:

# $class - TableChunker (automatic)

# %args - Arguments

# Required Arguments:

# Quoter - <Quoter> object

# MySQLDump - <MySQLDump> object

sub new {

my ( $class, %args ) = @_;

foreach my $arg ( qw(Quoter MySQLDump) ) {

die "I need a $arg argument" unless $args{$arg};

}

my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year);

my %real_types = map { $_ => 1 } qw(decimal double float);

my $self = {

%args,

int_types => \%int_types,

real_types => \%real_types,

EPOCH => '1970-01-01',

};

return bless $self, $class;

}

# Sub: find_chunk_columns

# Find chunkable columns.

# Parameters:

# %args - Arguments

# Required Arguments:

# table_struct - Hashref returned from <TableParser::parse()>

# Optional Arguments:

# exact - bool: Try to support exact chunk sizes (may still chunk fuzzily)

100

# Returns:

101

# Array: whether the table can be chunked exactly if requested (zero

102

# otherwise), arrayref of columns that support chunking. Example:

103

# (start code)

104

# 1,

105

# [

106

# { column => 'id', index => 'PRIMARY' },

107

# { column => 'i', index => 'i_idx' },

108

# ]

109

# (end code)

110

sub find_chunk_columns {

111

my ( $self, %args ) = @_;

112

foreach my $arg ( qw(tbl_struct) ) {

113

die "I need a $arg argument" unless $args{$arg};

114

}

115

my $tbl_struct = $args{tbl_struct};

116

117

# See if there's an index that will support chunking.

118

my @possible_indexes;

119

foreach my $index ( values %{ $tbl_struct->{keys} } ) {

120

121

# Accept only BTREE indexes.

122

next unless $index->{type} eq 'BTREE';

123

124

# Reject indexes with prefixed columns.

125

next if grep { defined } @{$index->{col_prefixes}};

126

127

# If exact, accept only unique, single-column indexes.

128

if ( $args{exact} ) {

129

next unless $index->{is_unique} && @{$index->{cols}} == 1;

130

}

131

132

push @possible_indexes, $index;

133

}

134

MKDEBUG && _d('Possible chunk indexes in order:',

135

join(', ', map { $_->{name} } @possible_indexes));

136

137

# Build list of candidate chunk columns.

138

my $can_chunk_exact = 0;

139

my @candidate_cols;

140

foreach my $index ( @possible_indexes ) {

141

my $col = $index->{cols}->[0];

142

143

# Accept only integer or real number type columns or character columns.

144

my $col_type = $tbl_struct->{type_for}->{$col};

145

next unless $self->{int_types}->{$col_type}

146

|| $self->{real_types}->{$col_type}

147

|| $col_type =~ m/char/;

148

149

# Save the candidate column and its index.

150

push @candidate_cols, { column => $col, index => $index->{name} };

151

}

152

153

$can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols;

154

155

if ( MKDEBUG ) {

156

my $chunk_type = $args{exact} ? 'Exact' : 'Inexact';

157

_d($chunk_type, 'chunkable:',

158

join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols));

159

}

160

161

# Order the candidates by their original column order.

162

# Put the PK's first column first, if it's a candidate.

163

my @result;

164

MKDEBUG && _d('Ordering columns by order in tbl, PK first');

165

if ( $tbl_struct->{keys}->{PRIMARY} ) {

166

my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0];

167

@result = grep { $_->{column} eq $pk_first_col } @candidate_cols;

168

@candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols;

169

}

170

my $i = 0;

171

my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}};

172

push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} }

173

@candidate_cols;

174

175

if ( MKDEBUG ) {

176

_d('Chunkable columns:',

177

join(', ', map { "$_->{column} on $_->{index}" } @result));

178

_d('Can chunk exactly:', $can_chunk_exact);

179

}

180

181

return ($can_chunk_exact, @result);

182

}

183

184

# Sub: calculate_chunks

185

# Calculate chunks for the given range statistics. Args min, max and

186

# rows_in_range are returned from get_range_statistics() which is usually

187

# called before this sub. Min and max are expected to be valid values

188

# (NULL is valid).

189

190

# Parameters:

191

# %args - Arguments

192

193

# Required Arguments:

194

# dbh - dbh

195

# db - database name

196

# tbl - table name

197

# tbl_struct - retval of <TableParser::parse()>

198

# chunk_col - column name to chunk on

199

# min - min col value, from <TableChunker::get_range_statistics()>

200

# max - max col value, from <TableChunker::get_range_statistics()>

201

# rows_in_range - number of rows to chunk, from

202

# <TableChunker::get_range_statistics()>

203

# chunk_size - requested size of each chunk

204

205

# Optional Arguments:

206

# exact - Use exact chunk_size? Use approximates is not.

207

# tries - Fetch up to this many rows to find a non-zero value

208

# chunk_range - Make chunk range open (default) or openclosed

209

210

# Returns:

211

# Array of WHERE predicates like "`col` >= '10' AND `col` < '20'",

212

# one for each chunk. All values are single-quoted due to <issue 1002 at

213

# http://code.google.com/p/maatkit/issues/detail?id=1002>. Example:

214

# (start code)

215

# `film_id` < '30',

216

# `film_id` >= '30' AND `film_id` < '60',

217

# `film_id` >= '60' AND `film_id` < '90',

218

# `film_id` >= '90',

219

# (end code)

220

sub calculate_chunks {

221

my ( $self, %args ) = @_;

222

my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);

223

foreach my $arg ( @required_args ) {

224

die "I need a $arg argument" unless defined $args{$arg};

225

}

226

MKDEBUG && _d('Calculate chunks for',

227

join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")}

228

qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact)

229

));

230

231

if ( !$args{rows_in_range} ) {

232

MKDEBUG && _d("Empty table");

233

return '1=1';

234

}

235

236

# http://code.google.com/p/maatkit/issues/detail?id=1084

237

if ( $args{rows_in_range} < $args{chunk_size} ) {

238

MKDEBUG && _d("Chunk size larger than rows in range");

239

return '1=1';

240

}

241

242

my $q = $self->{Quoter};

243

my $dbh = $args{dbh};

244

my $chunk_col = $args{chunk_col};

245

my $tbl_struct = $args{tbl_struct};

246

my $col_type = $tbl_struct->{type_for}->{$chunk_col};

247

MKDEBUG && _d('chunk col type:', $col_type);

248

249

# Get chunker info for the column type. Numeric cols are chunked

250

# differently than char cols.

251

my %chunker;

252

if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) {

253

%chunker = $self->_chunk_numeric(%args);

254

}

255

elsif ( $col_type =~ m/char/ ) {

256

%chunker = $self->_chunk_char(%args);

257

}

258

else {

259

die "Cannot chunk $col_type columns";

260

}

261

MKDEBUG && _d("Chunker:", Dumper(\%chunker));

262

my ($col, $start_point, $end_point, $interval, $range_func)

263

= @chunker{qw(col start_point end_point interval range_func)};

264

265

# Generate a list of chunk boundaries. The first and last chunks are

266

# inclusive, and will catch any rows before or after the end of the

267

# supposed range. So 1-100 divided into chunks of 30 should actually end

268

# up with chunks like this:

269

# < 30

270

# >= 30 AND < 60

271

# >= 60 AND < 90

272

# >= 90

273

# If zero_chunk was specified and zero chunking was possible, the first

274

# chunk will be = 0 to catch any zero or zero-equivalent (e.g. 00:00:00)

275

# rows.

276

my @chunks;

277

if ( $start_point < $end_point ) {

278

279

# The zero chunk, if there is one. It doesn't have to be the first

280

# chunk. The 0 cannot be quoted because if d='0000-00-00' then

281

# d=0 will work but d='0' will cause warning 1292: Incorrect date

282

# value: '0' for column 'd'. This might have to column-specific in

283

# future when we chunk on more exotic column types.

284

push @chunks, "$col = 0" if $chunker{have_zero_chunk};

285

286

my ($beg, $end);

287

my $iter = 0;

288

for ( my $i = $start_point; $i < $end_point; $i += $interval ) {

289

($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point);

290

291

# The first chunk.

292

if ( $iter++ == 0 ) {

293

push @chunks,

294

($chunker{have_zero_chunk} ? "$col > 0 AND " : "")

295

."$col < " . $q->quote_val($end);

296

}

297

else {

298

# The normal case is a chunk in the middle of the range somewhere.

299

push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end);

300

}

301

}

302

303

# Remove the last chunk and replace it with one that matches everything

304

# from the beginning of the last chunk to infinity, or to the max col

305

# value if chunk_range is openclosed. If the chunk column is nullable,

306

# do NULL separately.

307

my $chunk_range = lc $args{chunk_range} || 'open';

308

my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}};

309

pop @chunks;

310

if ( @chunks ) {

311

push @chunks, "$col >= " . $q->quote_val($beg)

312

. ($chunk_range eq 'openclosed'

313

? " AND $col <= " . $q->quote_val($args{max}) : "");

314

}

315

else {

316

push @chunks, $nullable ? "$col IS NOT NULL" : '1=1';

317

}

318

if ( $nullable ) {

319

push @chunks, "$col IS NULL";

320

}

321

}

322

else {

323

# There are no chunks; just do the whole table in one chunk.

324

MKDEBUG && _d('No chunks; using single chunk 1=1');

325

push @chunks, '1=1';

326

}

327

328

return @chunks;

329

}

330

331

# Sub: _chunk_numeric

332

# Determine how to chunk a numeric column.

333

334

# Parameters:

335

# %args - Arguments

336

337

# Required Arguments:

338

# dbh - dbh

339

# db - database name

340

# tbl - table name

341

# tbl_struct - retval of <TableParser::parse()>

342

# chunk_col - column name to chunk on

343

# min - min col value, from <TableChunker::get_range_statistics()>

344

# max - max col value, from <TableChunker::get_range_statistics()>

345

# rows_in_range - number of rows to chunk, from

346

# <TableChunker::get_range_statistics()>

347

# chunk_size - requested size of each chunk

348

349

# Optional Arguments:

350

# exact - Use exact chunk_size? Use approximates is not.

351

# tries - Fetch up to this many rows to find a non-zero value

352

# zero_chunk - Add an extra chunk for zero values? (0, 00:00, etc.)

353

354

# Returns:

355

# Array of chunker info that <calculate_chunks()> uses to create

356

# chunks, like:

357

# (start code)

358

# col => quoted chunk column name

359

# start_point => start value (a Perl number)

360

# end_point => end value (a Perl number)

361

# interval => interval to walk from start_ to end_point (a Perl number)

362

# range_func => coderef to return a value while walking that ^ range

363

# have_zero_chunk => whether to include a zero chunk (col=0)

364

# (end code)

365

sub _chunk_numeric {

366

my ( $self, %args ) = @_;

367

my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);

368

foreach my $arg ( @required_args ) {

369

die "I need a $arg argument" unless defined $args{$arg};

370

}

371

my $q = $self->{Quoter};

372

my $db_tbl = $q->quote($args{db}, $args{tbl});

373

my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}};

374

375

# Convert the given MySQL values to (Perl) numbers using some MySQL function.

376

# E.g.: SELECT TIME_TO_SEC('12:34') == 45240.

377

my $range_func;

378

if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {

379

$range_func = 'range_num';

380

}

381

elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {

382

$range_func = "range_$col_type";

383

}

384

elsif ( $col_type eq 'datetime' ) {

385

$range_func = 'range_datetime';

386

}

387

388

my ($start_point, $end_point);

389

eval {

390

$start_point = $self->value_to_number(

391

value => $args{min},

392

column_type => $col_type,

393

dbh => $args{dbh},

394

);

395

$end_point = $self->value_to_number(

396

value => $args{max},

397

column_type => $col_type,

398

dbh => $args{dbh},

399

);

400

};

401

if ( $EVAL_ERROR ) {

402

if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) {

403

# Special kind of error doesn't make sense with the more verbose

404

# description below.

405

die $EVAL_ERROR;

406

}

407

else {

408

die "Error calculating chunk start and end points for table "

409

. "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` "

410

. "with min/max values "

411

. join('/',

412

map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max))

413

. ":\n\n"

414

. $EVAL_ERROR

415

. "\nVerify that the min and max values are valid for the column. "

416

. "If they are valid, this error could be caused by a bug in the "

417

. "tool.";

418

}

419

}

420

421

# The end points might be NULL in the pathological case that the table

422

# has nothing but NULL values. If there's at least one non-NULL value

423

# then MIN() and MAX() will return it. Otherwise, the only thing to do

424

# is make NULL end points zero to make the code below work and any NULL

425

# values will be handled by the special "IS NULL" chunk.

426

if ( !defined $start_point ) {

427

MKDEBUG && _d('Start point is undefined');

428

$start_point = 0;

429

}

430

if ( !defined $end_point || $end_point < $start_point ) {

431

MKDEBUG && _d('End point is undefined or before start point');

432

$end_point = 0;

433

}

434

MKDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point);

435

436

# Determine if we can include a zero chunk (col = 0). If yes, then

437

# make sure the start point is non-zero. $start_point and $end_point

438

# should be numbers (converted from MySQL values earlier). The purpose

439

# of the zero chunk is to capture a potentially large number of zero

440

# values that might imbalance the size of the first chunk. E.g. if a

441

# lot of invalid times were inserted and stored as 00:00:00, these

442

# zero (equivalent) values are captured by the zero chunk instead of

443

# the first chunk + all the non-zero values in the first chunk.

444

my $have_zero_chunk = 0;

445

if ( $args{zero_chunk} ) {

446

if ( $start_point != $end_point && $start_point >= 0 ) {

447

MKDEBUG && _d('Zero chunking');

448

my $nonzero_val = $self->get_nonzero_value(

449

%args,

450

db_tbl => $db_tbl,

451

col => $args{chunk_col},

452

col_type => $col_type,

453

val => $args{min}

454

);

455

# Since we called value_to_number() before with this column type

456

# we shouldn't have to worry about it dying here--it would have

457

# died earlier if we can't chunk the column type.

458

$start_point = $self->value_to_number(

459

value => $nonzero_val,

460

column_type => $col_type,

461

dbh => $args{dbh},

462

);

463

$have_zero_chunk = 1;

464

}

465

else {

466

MKDEBUG && _d("Cannot zero chunk");

467

}

468

}

469

MKDEBUG && _d("Using chunk range:", $start_point, "to", $end_point);

470

471

# Calculate the chunk size in terms of "distance between endpoints"

472

# that will give approximately the right number of rows between the

473

# endpoints. If possible and requested, forbid chunks from being any

474

# bigger than specified.

475

my $interval = $args{chunk_size}

476

* ($end_point - $start_point)

477

/ $args{rows_in_range};

478

if ( $self->{int_types}->{$col_type} ) {

479

$interval = ceil($interval);

480

}

481

$interval ||= $args{chunk_size};

482

if ( $args{exact} ) {

483

$interval = $args{chunk_size};

484

}

485

MKDEBUG && _d('Chunk interval:', $interval, 'units');

486

487

return (

488

col => $q->quote($args{chunk_col}),

489

start_point => $start_point,

490

end_point => $end_point,

491

interval => $interval,

492

range_func => $range_func,

493

have_zero_chunk => $have_zero_chunk,

494

);

495

}

496

497

# Sub: _chunk_numeric

498

# Determine how to chunk a character column.

499

500

# Parameters:

501

# %args - Arguments

502

503

# Required Arguments:

504

# dbh - dbh

505

# db - database name

506

# tbl - table name

507

# tbl_struct - retval of <TableParser::parse()>

508

# chunk_col - column name to chunk on

509

# min - min col value, from <TableChunker::get_range_statistics()>

510

# max - max col value, from <TableChunker::get_range_statistics()>

511

# rows_in_range - number of rows to chunk, from

512

# <TableChunker::get_range_statistics()>

513

# chunk_size - requested size of each chunk

514

515

# Returns:

516

# Array of chunker info that <calculate_chunks()> uses to create

517

# chunks, like:

518

# (start code)

519

# col => quoted chunk column name

520

# start_point => start value (a Perl number)

521

# end_point => end value (a Perl number)

522

# interval => interval to walk from start_ to end_point (a Perl number)

523

# range_func => coderef to return a value while walking that ^ range

524

# (end code)

525

sub _chunk_char {

526

my ( $self, %args ) = @_;

527

my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);

528

foreach my $arg ( @required_args ) {

529

die "I need a $arg argument" unless defined $args{$arg};

530

}

531

my $q = $self->{Quoter};

532

my $db_tbl = $q->quote($args{db}, $args{tbl});

533

my $dbh = $args{dbh};

534

my $chunk_col = $args{chunk_col};

535

my $row;

536

my $sql;

537

538

# Get what MySQL says are the min and max column values.

539

# For example, is 'a' or 'A' the min according to MySQL?

540

$sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl "

541

. "ORDER BY `$chunk_col`";

542

MKDEBUG && _d($dbh, $sql);

543

$row = $dbh->selectrow_arrayref($sql);

544

my ($min_col, $max_col) = ($row->[0], $row->[1]);

545

546

# Get the character codes between the min and max column values.

547

$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";

548

MKDEBUG && _d($dbh, $sql);

549

my $ord_sth = $dbh->prepare($sql); # avoid quoting issues

550

$ord_sth->execute($min_col, $max_col);

551

$row = $ord_sth->fetchrow_arrayref();

552

my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]);

553

MKDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord);

554

555

# Create a sorted chacater-to-number map of the unique characters in

556

# the column ranging from the min character code to the max.

557

my $base;

558

my @chars;

559

MKDEBUG && _d("Table charset:", $args{tbl_struct}->{charset});

560

if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) {

561

# These are the unique, sorted latin1 character codes according to

562

# MySQL. You'll notice that many are missing. That's because MySQL

563

# treats many characters as the same, for example "e" and "é".

564

my @sorted_latin1_chars = (

565

32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,

566

46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,

567

60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,

568

74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,

569

88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161,

570

162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,

571

176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,

572

190, 191, 215, 216, 222, 223, 247, 255);

573

574

my ($first_char, $last_char);

575

for my $i ( 0..$#sorted_latin1_chars ) {

576

$first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord;

577

}

578

for my $i ( $first_char..$#sorted_latin1_chars ) {

579

$last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord;

580

};

581

582

@chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char];

583

$base = scalar @chars;

584

}

585

else {

586

# If the table's charset isn't latin1, who knows what charset is being

587

# used, what characters it contains, and how those characters are sorted.

588

# So we create a character map and let MySQL tell us these things.

589

590

# Create a temp table with the same char col def as the original table.

591

my $tmp_tbl = '__maatkit_char_chunking_map';

592

my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl);

593

$sql = "DROP TABLE IF EXISTS $tmp_db_tbl";

594

MKDEBUG && _d($dbh, $sql);

595

$dbh->do($sql);

596

my $col_def = $args{tbl_struct}->{defs}->{$chunk_col};

597

$sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) "

598

. "ENGINE=MEMORY";

599

MKDEBUG && _d($dbh, $sql);

600

$dbh->do($sql);

601

602

# Populate the temp table with all the characters between the min and max

603

# max character codes. This is our character-to-number map.

604

$sql = "INSERT INTO $tmp_db_tbl VALUE (CHAR(?))";

605

MKDEBUG && _d($dbh, $sql);

606

my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues

607

for my $char_code ( $min_col_ord..$max_col_ord ) {

608

$ins_char_sth->execute($char_code);

609

}

610

611

# Select from the char-to-number map all characters between the

612

# min and max col values, letting MySQL order them. The first

613

# character returned becomes "zero" in a new base system of counting,

614

# the second character becomes "one", etc. So if 42 chars are

615

# returned like [a, B, c, d, é, ..., ü] then we have a base 42

616

# system where 0=a, 1=B, 2=c, 3=d, 4=é, ... 41=ü. count_base()

617

# helps us count in arbitrary systems.

618

$sql = "SELECT `$chunk_col` FROM $tmp_db_tbl "

619

. "WHERE `$chunk_col` BETWEEN ? AND ? "

620

. "ORDER BY `$chunk_col`";

621

MKDEBUG && _d($dbh, $sql);

622

my $sel_char_sth = $dbh->prepare($sql);

623

$sel_char_sth->execute($min_col, $max_col);

624

625

@chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() };

626

$base = scalar @chars;

627

628

$sql = "DROP TABLE $tmp_db_tbl";

629

MKDEBUG && _d($dbh, $sql);

630

$dbh->do($sql);

631

}

632

MKDEBUG && _d("Base", $base, "chars:", @chars);

633

634

# Now we begin calculating how to chunk the char column. This is

635

# completely different from _chunk_numeric because we're not dealing

636

# with the values to chunk directly (the characters) but rather a map.

637

638

# In our base system, how many values can 1, 2, etc. characters express?

639

# E.g. in a base 26 system (a-z), 1 char expresses 26^1=26 chars (a-z),

640

# 2 chars expresses 26^2=676 chars. If the requested chunk size is 100,

641

# then 1 char might not express enough values, but 2 surely can. This

642

# is imperefect because we don't know if we have data like: [apple, boy,

643

# car] (i.e. values evenly distributed across the range of chars), or

644

# [ant, apple, azur, boy]. We assume data is more evenly distributed

645

# than not so we use the minimum number of characters to express a chunk

646

# size.

647

$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`";

648

MKDEBUG && _d($dbh, $sql);

649

$row = $dbh->selectrow_arrayref($sql);

650

my $max_col_len = $row->[0];

651

MKDEBUG && _d("Max column value:", $max_col, $max_col_len);

652

my $n_values;

653

for my $n_chars ( 1..$max_col_len ) {

654

$n_values = $base**$n_chars;

655

if ( $n_values >= $args{chunk_size} ) {

656

MKDEBUG && _d($n_chars, "chars in base", $base, "expresses",

657

$n_values, "values");

658

last;

659

}

660

}

661

662

# Our interval is not like a _chunk_numeric() interval, either, because

663

# we don't increment in the actual values (i.e. the characters) but rather

664

# in the char-to-number map. If the above calculation found that 1 char

665

# expressed enough values for 1 chunk, then each char in the map will

666

# yield roughly one chunk of values, so the interval is 1. Or, if we need

667

# 2 chars to express enough vals for 1 chunk, then we'll increment through

668

# the map 2 chars at a time, like [a, b], [c, d], etc.

669

my $n_chunks = $args{rows_in_range} / $args{chunk_size};

670

my $interval = floor($n_values / $n_chunks) || 1;

671

672

my $range_func = sub {

673

my ( $self, $dbh, $start, $interval, $max ) = @_;

674

my $start_char = $self->base_count(

675

count_to => $start,

676

base => $base,

677

symbols => \@chars,

678

);

679

my $end_char = $self->base_count(

680

count_to => min($max, $start + $interval),

681

base => $base,

682

symbols => \@chars,

683

);

684

return $start_char, $end_char;

685

};

686

687

return (

688

col => $q->quote($chunk_col),

689

start_point => 0,

690

end_point => $n_values,

691

interval => $interval,

692

range_func => $range_func,

693

);

694

}

695

696

# Sub: get_first_chunkable_column

697

# Get the first chunkable column in a table.

698

# Only a "sane" column/index is returned. That means that

699

# the first auto-detected chunk col/index are used if any combination of

700

# preferred chunk col or index would be really bad, like chunk col=x

701

# and chunk index=some index over (y, z). That's bad because the index

702

# doesn't include the column; it would also be bad if the column wasn't

703

# a left-most prefix of the index.

704

705

# Parameters:

706

# %args - Arguments

707

708

# Required Arguments:

709

# tbl_struct - Hashref returned by <TableParser::parse()>

710

711

# Optional arguments:

712

# chunk_column - Preferred chunkable column name

713

# chunk_index - Preferred chunkable column index name

714

# exact - bool: passed to <find_chunk_columns()>

715

716

# Returns:

717

# List: chunkable column name, chunkable colum index

718

sub get_first_chunkable_column {

719

my ( $self, %args ) = @_;

720

foreach my $arg ( qw(tbl_struct) ) {

721

die "I need a $arg argument" unless $args{$arg};

722

}

723

724

# First auto-detected chunk col/index. If any combination of preferred

725

# chunk col or index are specified and are sane, they will overwrite

726

# these defaults. Else, these defaults will be returned.

727

my ($exact, @cols) = $self->find_chunk_columns(%args);

728

my $col = $cols[0]->{column};

729

my $idx = $cols[0]->{index};

730

731

# Wanted/preferred chunk column and index. Caller only gets what

732

# they want, though, if it results in a sane col/index pair.

733

my $wanted_col = $args{chunk_column};

734

my $wanted_idx = $args{chunk_index};

735

MKDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx);

736

737

if ( $wanted_col && $wanted_idx ) {

738

# Preferred column and index: check that the pair is sane.

739

foreach my $chunkable_col ( @cols ) {

740

if ( $wanted_col eq $chunkable_col->{column}

741

&& $wanted_idx eq $chunkable_col->{index} ) {

742

# The wanted column is chunkable with the wanted index.

743

$col = $wanted_col;

744

$idx = $wanted_idx;

745

last;

746

}

747

}

748

}

749

elsif ( $wanted_col ) {

750

# Preferred column, no index: check if column is chunkable, if yes

751

# then use its index, else fall back to default col/index.

752

foreach my $chunkable_col ( @cols ) {

753

if ( $wanted_col eq $chunkable_col->{column} ) {

754

# The wanted column is chunkable, so use its index and overwrite

755

# the defaults.

756

$col = $wanted_col;

757

$idx = $chunkable_col->{index};

758

last;

759

}

760

}

761

}

762

elsif ( $wanted_idx ) {

763

# Preferred index, no column: check if index's left-most column is

764

# chunkable, if yes then use its column, else fall back to auto-detected

765

# col/index.

766

foreach my $chunkable_col ( @cols ) {

767

if ( $wanted_idx eq $chunkable_col->{index} ) {

768

# The wanted index has a chunkable column, so use it and overwrite

769

# the defaults.

770

$col = $chunkable_col->{column};

771

$idx = $wanted_idx;

772

last;

773

}

774

}

775

}

776

777

MKDEBUG && _d('First chunkable col/index:', $col, $idx);

778

return $col, $idx;

779

}

780

781

# Sub: size_to_rows

782

# Convert a size in rows or bytes to a number of rows in the table,

783

# using SHOW TABLE STATUS. If the size is a string with a suffix of M/G/k,

784

# interpret it as mebibytes, gibibytes, or kibibytes respectively.

785

# If it's just a number, treat it as a number of rows and return right away.

786

787

# Parameters:

788

# %args - Arguments

789

790

# Required Arguments:

791

# dbh - dbh

792

# db - Database name

793

# tbl - Table name

794

# chunk_size - Chunk size string like "1000" or "50M"

795

796

# Returns:

797

# Array: number of rows, average row size

798

sub size_to_rows {

799

my ( $self, %args ) = @_;

800

my @required_args = qw(dbh db tbl chunk_size);

801

foreach my $arg ( @required_args ) {

802

die "I need a $arg argument" unless $args{$arg};

803

}

804

my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args};

805

my $q = $self->{Quoter};

806

my $du = $self->{MySQLDump};

807

808

my ($n_rows, $avg_row_length);

809

810

my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/;

811

if ( $suffix ) { # Convert to bytes.

812

$chunk_size = $suffix eq 'k' ? $num * 1_024

813

: $suffix eq 'M' ? $num * 1_024 * 1_024

814

: $num * 1_024 * 1_024 * 1_024;

815

}

816

elsif ( $num ) {

817

$n_rows = $num;

818

}

819

else {

820

die "Invalid chunk size $chunk_size; must be an integer "

821

. "with optional suffix kMG";

822

}

823

824

if ( $suffix || $args{avg_row_length} ) {

825

my ($status) = $du->get_table_status($dbh, $q, $db, $tbl);

826

$avg_row_length = $status->{avg_row_length};

827

if ( !defined $n_rows ) {

828

$n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef;

829

}

830

}

831

832

return $n_rows, $avg_row_length;

833

}

834

835

# Sub: get_range_statistics

836

# Determine the range of values for the chunk_col column on this table.

837

838

# Parameters:

839

# %args - Arguments

840

841

# Required Arguments:

842

# dbh - dbh

843

# db - Database name

844

# tbl - Table name

845

# chunk_col - Chunk column name

846

# tbl_struct - Hashref returned by <TableParser::parse()>

847

848

# Optional arguments:

849

# where - WHERE clause without "WHERE" to restrict range

850

# index_hint - "FORCE INDEX (...)" clause

851

# tries - Fetch up to this many rows to find a valid value

852

853

# Returns:

854

# Array: min row value, max row value, rows in range

855

sub get_range_statistics {

856

my ( $self, %args ) = @_;

857

my @required_args = qw(dbh db tbl chunk_col tbl_struct);

858

foreach my $arg ( @required_args ) {

859

die "I need a $arg argument" unless $args{$arg};

860

}

861

my ($dbh, $db, $tbl, $col) = @args{@required_args};

862

my $where = $args{where};

863

my $q = $self->{Quoter};

864

865

my $col_type = $args{tbl_struct}->{type_for}->{$col};

866

my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col};

867

868

# Quote these once so we don't have to do it again.

869

my $db_tbl = $q->quote($db, $tbl);

870

$col = $q->quote($col);

871

872

my ($min, $max);

873

eval {

874

# First get the actual end points, whatever MySQL considers the

875

# min and max values to be for this column.

876

my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl"

877

. ($args{index_hint} ? " $args{index_hint}" : "")

878

. ($where ? " WHERE ($where)" : '');

879

MKDEBUG && _d($dbh, $sql);

880

($min, $max) = $dbh->selectrow_array($sql);

881

MKDEBUG && _d("Actual end points:", $min, $max);

882

883

# Now, for two reasons, get the valid end points. For one, an

884

# end point may be 0 or some zero-equivalent and the user doesn't

885

# want that because it skews the range. Or two, an end point may

886

# be an invalid value like date 2010-00-00 and we can't use that.

887

($min, $max) = $self->get_valid_end_points(

888

%args,

889

dbh => $dbh,

890

db_tbl => $db_tbl,

891

col => $col,

892

col_type => $col_type,

893

min => $min,

894

max => $max,

895

);

896

MKDEBUG && _d("Valid end points:", $min, $max);

897

};

898

if ( $EVAL_ERROR ) {

899

die "Error getting min and max values for table $db_tbl "

900

. "on column $col: $EVAL_ERROR";

901

}

902

903

# Finally get the total number of rows in range, usually the whole

904

# table unless there's a where arg restricting the range.

905

my $sql = "EXPLAIN SELECT * FROM $db_tbl"

906

. ($args{index_hint} ? " $args{index_hint}" : "")

907

. ($where ? " WHERE $where" : '');

908

MKDEBUG && _d($sql);

909

my $expl = $dbh->selectrow_hashref($sql);

910

911

return (

912

min => $min,

913

max => $max,

914

rows_in_range => $expl->{rows},

915

);

916

}

917

918

# Sub: inject_chunks

919

# Create a SQL statement from a query prototype by filling in placeholders.

920

921

# Parameters:

922

# %args - Arguments

923

924

# Required Arguments:

925

# database - Database name

926

# table - Table name

927

# chunks - Arrayref of chunks from <calculate_chunks()>

928

# chunk_num - Index into chunks to use

929

# query - Query prototype returned by

930

# <TableChecksum::make_checksum_query()>

931

932

# Optional Arguments:

933

# index_hint - "FORCE INDEX (...)" clause

934

# where - Arrayref of WHERE clauses joined with AND

935

936

# Returns:

937

# A SQL statement

938

sub inject_chunks {

939

my ( $self, %args ) = @_;

940

foreach my $arg ( qw(database table chunks chunk_num query) ) {

941

die "I need a $arg argument" unless defined $args{$arg};

942

}

943

MKDEBUG && _d('Injecting chunk', $args{chunk_num});

944

my $query = $args{query};

945

my $comment = sprintf("/*%s.%s:%d/%d*/",

946

$args{database}, $args{table},

947

$args{chunk_num} + 1, scalar @{$args{chunks}});

948

$query =~ s!/\*PROGRESS_COMMENT\*/!$comment!;

949

my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')';

950

if ( $args{where} && grep { $_ } @{$args{where}} ) {

951

$where .= " AND ("

952

. join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} )

953

. ")";

954

}

955

my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)});

956

my $index_hint = $args{index_hint} || '';

957

958

MKDEBUG && _d('Parameters:',

959

Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint}));

960

$query =~ s!/\*WHERE\*/! $where!;

961

$query =~ s!/\*DB_TBL\*/!$db_tbl!;

962

$query =~ s!/\*INDEX_HINT\*/! $index_hint!;

963

$query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!;

964

965

return $query;

966

}

967

968

# #############################################################################

969

# MySQL value to Perl number conversion.

970

# #############################################################################

971

972

# Convert a MySQL column value to a Perl integer.

973

# Arguments:

974

# * value scalar: MySQL value to convert

975

# * column_type scalar: MySQL column type of the value

976

# * dbh dbh

977

# Returns an integer or undef if the value isn't convertible

978

# (e.g. date 0000-00-00 is not convertible).

979

sub value_to_number {

980

my ( $self, %args ) = @_;

981

my @required_args = qw(column_type dbh);

982

foreach my $arg ( @required_args ) {

983

die "I need a $arg argument" unless defined $args{$arg};

984

}

985

my $val = $args{value};

986

my ($col_type, $dbh) = @args{@required_args};

987

MKDEBUG && _d('Converting MySQL', $col_type, $val);

988

989

return unless defined $val; # value is NULL

990

991

# MySQL functions to convert a non-numeric value to a number

992

# so we can do basic math on it in Perl. Right now we just

993

# convert temporal values but later we'll need to convert char

994

# and hex values.

995

my %mysql_conv_func_for = (

996

timestamp => 'UNIX_TIMESTAMP',

997

date => 'TO_DAYS',

998

time => 'TIME_TO_SEC',

999

datetime => 'TO_DAYS',

1000

);

1001

1002

# Convert the value to a number that Perl can do arithmetic with.

1003

my $num;

1004

if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {

1005

# These types are already numbers.

1006

$num = $val;

1007

}

1008

elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {

1009

# These are temporal values. Convert them using a MySQL func.

1010

my $func = $mysql_conv_func_for{$col_type};

1011

my $sql = "SELECT $func(?)";

1012

MKDEBUG && _d($dbh, $sql, $val);

1013

my $sth = $dbh->prepare($sql);

1014

$sth->execute($val);

1015

($num) = $sth->fetchrow_array();

1016

}

1017

elsif ( $col_type eq 'datetime' ) {

1018

# This type is temporal, too, but needs special handling.

1019

# Newer versions of MySQL could use TIMESTAMPDIFF, but it's easier

1020

# to maintain just one kind of code, so I do it all with DATE_ADD().

1021

$num = $self->timestampdiff($dbh, $val);

1022

}

1023

else {

1024

die "I don't know how to chunk $col_type\n";

1025

}

1026

MKDEBUG && _d('Converts to', $num);

1027

return $num;

1028

}

1029

1030

# ###########################################################################

1031

# Range functions.

1032

# ###########################################################################

1033

sub range_num {

1034

my ( $self, $dbh, $start, $interval, $max ) = @_;

1035

my $end = min($max, $start + $interval);

1036

1037

1038

# "Remove" scientific notation so the regex below does not make

1039

# 6.123456e+18 into 6.12345.

1040

$start = sprintf('%.17f', $start) if $start =~ /e/;

1041

$end = sprintf('%.17f', $end) if $end =~ /e/;

1042

1043

# Trim decimal places, if needed. This helps avoid issues with float

1044

# precision differing on different platforms.

1045

$start =~ s/\.(\d{5}).*$/.$1/;

1046

$end =~ s/\.(\d{5}).*$/.$1/;

1047

1048

if ( $end > $start ) {

1049

return ( $start, $end );

1050

}

1051

else {

1052

die "Chunk size is too small: $end !> $start\n";

1053

}

1054

}

1055

1056

sub range_time {

1057

my ( $self, $dbh, $start, $interval, $max ) = @_;

1058

my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))";

1059

MKDEBUG && _d($sql);

1060

return $dbh->selectrow_array($sql);

1061

}

1062

1063

sub range_date {

1064

my ( $self, $dbh, $start, $interval, $max ) = @_;

1065

my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))";

1066

MKDEBUG && _d($sql);

1067

return $dbh->selectrow_array($sql);

1068

}

1069

1070

sub range_datetime {

1071

my ( $self, $dbh, $start, $interval, $max ) = @_;

1072

my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), "

1073

. "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)";

1074

MKDEBUG && _d($sql);

1075

return $dbh->selectrow_array($sql);

1076

}

1077

1078

sub range_timestamp {

1079

my ( $self, $dbh, $start, $interval, $max ) = @_;

1080

my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))";

1081

MKDEBUG && _d($sql);

1082

return $dbh->selectrow_array($sql);

1083

}

1084

1085

# Returns the number of seconds between EPOCH and the value, according to

1086

# the MySQL server. (The server can do no wrong). I believe this code is right

1087

# after looking at the source of sql/time.cc but I am paranoid and add in an

1088

# extra check just to make sure. Earlier versions overflow on large interval

1089

# values, such as on 3.23.58, '1970-01-01' - interval 58000000000 second is

1090

# 2037-06-25 11:29:04. I know of no workaround. TO_DAYS('0000-....') is NULL,

1091

# so we treat it as 0.

1092

sub timestampdiff {

1093

my ( $self, $dbh, $time ) = @_;

1094

my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) "

1095

. "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400";

1096

MKDEBUG && _d($sql);

1097

my ( $diff ) = $dbh->selectrow_array($sql);

1098

$sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)";

1099

MKDEBUG && _d($sql);

1100

my ( $check ) = $dbh->selectrow_array($sql);

1101

die <<" EOF"

1102

Incorrect datetime math: given $time, calculated $diff but checked to $check.

1103

This could be due to a version of MySQL that overflows on large interval

1104

values to DATE_ADD(), or the given datetime is not a valid date. If not,

1105

please report this as a bug.

1106

EOF

1107

unless $check eq $time;

1108

return $diff;

1109

}

1110

1111

1112

# #############################################################################

1113

# End point validation.

1114

# #############################################################################

1115

1116

# These sub require val (or min and max) args which usually aren't NULL

1117

# but could be zero so the usual "die ... unless $args{$arg}" check does

1118

# not work.

1119

1120

# Returns valid min and max values. A valid val evaluates to a non-NULL value.

1121

# Arguments:

1122

# * dbh dbh

1123

# * db_tbl scalar: quoted `db`.`tbl`

1124

# * col scalar: quoted `column`

1125

# * col_type scalar: column type of the value

1126

# * min scalar: any scalar value

1127

# * max scalar: any scalar value

1128

# Optional arguments:

1129

# * index_hint scalar: "FORCE INDEX (...)" hint

1130

# * where scalar: WHERE clause without "WHERE"

1131

# * tries scalar: fetch up to this many rows to find a valid value

1132

# * zero_chunk bool: do a separate chunk for zero values

1133

# Some column types can store invalid values, like most of the temporal

1134

# types. When evaluated, invalid values return NULL. If the value is

1135

# NULL to begin with, then it is not invalid because NULL is valid.

1136

# For example, TO_DAYS('2009-00-00') evalues to NULL because that date

1137

# is invalid, even though it's storable.

1138

sub get_valid_end_points {

1139

my ( $self, %args ) = @_;

1140

my @required_args = qw(dbh db_tbl col col_type);

1141

foreach my $arg ( @required_args ) {

1142

die "I need a $arg argument" unless $args{$arg};

1143

}

1144

my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};

1145

my ($real_min, $real_max) = @args{qw(min max)};

1146

1147

# Common error message format in case there's a problem with

1148

# finding a valid min or max value.

1149

my $err_fmt = "Error finding a valid %s value for table $db_tbl on "

1150

. "column $col. The real %s value %s is invalid and "

1151

. "no other valid values were found. Verify that the table "

1152

. "has at least one valid value for this column"

1153

. ($args{where} ? " where $args{where}." : ".");

1154

1155

# Validate min value if it's not NULL. NULL is valid.

1156

my $valid_min = $real_min;

1157

if ( defined $valid_min ) {

1158

# Get a valid min end point.

1159

MKDEBUG && _d("Validating min end point:", $real_min);

1160

$valid_min = $self->_get_valid_end_point(

1161

%args,

1162

val => $real_min,

1163

endpoint => 'min',

1164

);

1165

die sprintf($err_fmt, 'minimum', 'minimum',

1166

(defined $real_min ? $real_min : "NULL"))

1167

unless defined $valid_min;

1168

}

1169

1170

# Validate max value if it's not NULL. NULL is valid.

1171

my $valid_max = $real_max;

1172

if ( defined $valid_max ) {

1173

# Get a valid max end point. So far I've not found a case where

1174

# the actual max val is invalid, but check anyway just in case.

1175

MKDEBUG && _d("Validating max end point:", $real_min);

1176

$valid_max = $self->_get_valid_end_point(

1177

%args,

1178

val => $real_max,

1179

endpoint => 'max',

1180

);

1181

die sprintf($err_fmt, 'maximum', 'maximum',

1182

(defined $real_max ? $real_max : "NULL"))

1183

unless defined $valid_max;

1184

}

1185

1186

return $valid_min, $valid_max;

1187

}

1188

1189

# Does the actual work for get_valid_end_points() for each end point.

1190

sub _get_valid_end_point {

1191

my ( $self, %args ) = @_;

1192

my @required_args = qw(dbh db_tbl col col_type);

1193

foreach my $arg ( @required_args ) {

1194

die "I need a $arg argument" unless $args{$arg};

1195

}

1196

my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};

1197

my $val = $args{val};

1198

1199

# NULL is valid.

1200

return $val unless defined $val;

1201

1202

# Right now we only validate temporal types, but when we begin

1203

# chunking char and hex columns we'll need to validate those.

1204

# E.g. HEX('abcdefg') is invalid and we'll probably find some

1205

# combination of char val + charset/collation that's invalid.

1206

my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value

1207

: undef;

1208

1209

# If we cannot validate the value, assume it's valid.

1210

if ( !$validate ) {

1211

MKDEBUG && _d("No validator for", $col_type, "values");

1212

return $val;

1213

}

1214

1215

# Return the value if it's already valid.

1216

return $val if defined $validate->($dbh, $val);

1217

1218

# The value is not valid so find the first one in the table that is.

1219

MKDEBUG && _d("Value is invalid, getting first valid value");

1220

$val = $self->get_first_valid_value(

1221

%args,

1222

val => $val,

1223

validate => $validate,

1224

);

1225

1226

return $val;

1227

}

1228

1229

# Arguments:

1230

# * dbh dbh

1231

# * db_tbl scalar: quoted `db`.`tbl`

1232

# * col scalar: quoted `column` name

1233

# * val scalar: the current value, may be real, maybe not

1234

# * validate coderef: returns a defined value if the given value is valid

1235

# * endpoint scalar: "min" or "max", i.e. find first endpoint() real val

1236

# Optional arguments:

1237

# * tries scalar: fetch up to this many rows to find a valid value

1238

# * index_hint scalar: "FORCE INDEX (...)" hint

1239

# * where scalar: WHERE clause without "WHERE"

1240

# Returns the first column value from the given db_tbl that does *not*

1241

# evaluate to NULL. This is used mostly to eliminate unreal temporal

1242

# values which MySQL allows to be stored, like "2010-00-00". Returns

1243

# undef if no real value is found.

1244

sub get_first_valid_value {

1245

my ( $self, %args ) = @_;

1246

my @required_args = qw(dbh db_tbl col validate endpoint);

1247

foreach my $arg ( @required_args ) {

1248

die "I need a $arg argument" unless $args{$arg};

1249

}

1250

my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args};

1251

my $tries = defined $args{tries} ? $args{tries} : 5;

1252

my $val = $args{val};

1253

1254

# NULL values are valid and shouldn't be passed to us.

1255

return unless defined $val;

1256

1257

# Prep a sth for fetching the next col val.

1258

my $cmp = $endpoint =~ m/min/i ? '>'

1259

: $endpoint =~ m/max/i ? '<'

1260

: die "Invalid endpoint arg: $endpoint";

1261

my $sql = "SELECT $col FROM $db_tbl "

1262

. ($args{index_hint} ? "$args{index_hint} " : "")

1263

. "WHERE $col $cmp ? AND $col IS NOT NULL "

1264

. ($args{where} ? "AND ($args{where}) " : "")

1265

. "ORDER BY $col LIMIT 1";

1266

MKDEBUG && _d($dbh, $sql);

1267

my $sth = $dbh->prepare($sql);

1268

1269

# Fetch the next col val from the db.tbl until we find a valid one

1270

# or run out of rows. Only try a limited number of next rows.

1271

my $last_val = $val;

1272

while ( $tries-- ) {

1273

$sth->execute($last_val);

1274

my ($next_val) = $sth->fetchrow_array();

1275

MKDEBUG && _d('Next value:', $next_val, '; tries left:', $tries);

1276

if ( !defined $next_val ) {

1277

MKDEBUG && _d('No more rows in table');

1278

last;

1279

}

1280

if ( defined $validate->($dbh, $next_val) ) {

1281

MKDEBUG && _d('First valid value:', $next_val);

1282

$sth->finish();

1283

return $next_val;

1284

}

1285

$last_val = $next_val;

1286

}

1287

$sth->finish();

1288

$val = undef; # no valid value found

1289

1290

return $val;

1291

}

1292

1293

# Evalutes any temporal value, returns NULL if it's invalid, else returns

1294

# a value (possibly zero). It's magical but tested. See also,

1295

# http://hackmysql.com/blog/2010/05/26/detecting-invalid-and-zero-temporal-values/

1296

sub _validate_temporal_value {

1297

my ( $dbh, $val ) = @_;

1298

my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))";

1299

my $res;

1300

eval {

1301

MKDEBUG && _d($dbh, $sql, $val);

1302

my $sth = $dbh->prepare($sql);

1303

$sth->execute($val, $val, $val, $val);

1304

($res) = $sth->fetchrow_array();

1305

$sth->finish();

1306

};

1307

if ( $EVAL_ERROR ) {

1308

MKDEBUG && _d($EVAL_ERROR);

1309

}

1310

return $res;

1311

}

1312

1313

sub get_nonzero_value {

1314

my ( $self, %args ) = @_;

1315

my @required_args = qw(dbh db_tbl col col_type);

1316

foreach my $arg ( @required_args ) {

1317

die "I need a $arg argument" unless $args{$arg};

1318

}

1319

my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};

1320

my $tries = defined $args{tries} ? $args{tries} : 5;

1321

my $val = $args{val};

1322

1323

# Right now we only need a special check for temporal values.

1324

# _validate_temporal_value() does double-duty for this. The

1325

# default anonymous sub handles ints.

1326

my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value

1327

: sub { return $_[1]; };

1328

1329

if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry

1330

MKDEBUG && _d('Discarding zero value:', $val);

1331

my $sql = "SELECT $col FROM $db_tbl "

1332

. ($args{index_hint} ? "$args{index_hint} " : "")

1333

. "WHERE $col > ? AND $col IS NOT NULL "

1334

. ($args{where} ? "AND ($args{where}) " : '')

1335

. "ORDER BY $col LIMIT 1";

1336

MKDEBUG && _d($sql);

1337

my $sth = $dbh->prepare($sql);

1338

1339

my $last_val = $val;

1340

while ( $tries-- ) {

1341

$sth->execute($last_val);

1342

my ($next_val) = $sth->fetchrow_array();

1343

if ( $is_nonzero->($dbh, $next_val) ) {

1344

MKDEBUG && _d('First non-zero value:', $next_val);

1345

$sth->finish();

1346

return $next_val;

1347

}

1348

$last_val = $next_val;

1349

}

1350

$sth->finish();

1351

$val = undef; # no non-zero value found

1352

}

1353

1354

return $val;

1355

}

1356

1357

# Sub: base_count

1358

# Count to any number in any base with the given symbols. E.g. if counting

1359

# to 10 in base 16 with symbols 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f the result

1360

# is "a". This is trival for stuff like base 16 (hex), but far less trivial

1361

# for arbitrary bases with arbitrary symbols like base 25 with symbols

1362

# B,C,D,...X,Y,Z. For that, counting to 10 results in "L". The base and its

1363

# symbols are determined by the character column. Symbols can be non-ASCII.

1364

1365

# Parameters:

1366

# %args - Arguments

1367

1368

# Required Arguments:

1369

# count_to - Number to count to

1370

# base - Base of special system

1371

# symbols - Arrayref of symbols for "numbers" in special system

1372

1373

# Returns:

1374

# The "number" (symbol) in the special target base system

1375

sub base_count {

1376

my ( $self, %args ) = @_;

1377

my @required_args = qw(count_to base symbols);

1378

foreach my $arg ( @required_args ) {

1379

die "I need a $arg argument" unless defined $args{$arg};

1380

}

1381

my ($n, $base, $symbols) = @args{@required_args};

1382

1383

# Can't take log of zero and the zeroth symbol in any base is the

1384

# zeroth symbol in any other base.

1385

return $symbols->[0] if $n == 0;

1386

1387

my $highest_power = floor(log($n)/log($base));

1388

if ( $highest_power == 0 ){

1389

return $symbols->[$n];

1390

}

1391

1392

my @base_powers;

1393

for my $power ( 0..$highest_power ) {

1394

push @base_powers, ($base**$power) || 1;

1395

}

1396

1397

my @base_multiples;

1398

foreach my $base_power ( reverse @base_powers ) {

1399

my $multiples = floor($n / $base_power);

1400

push @base_multiples, $multiples;

1401

$n -= $multiples * $base_power;

1402

}

1403

1404

return join('', map { $symbols->[$_] } @base_multiples);

1405

}

1406

1407

sub _d {

1408

my ($package, undef, $line) = caller 0;

1409

@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }

1410

map { defined $_ ? $_ : 'undef' }

1411

@_;

1412

print STDERR "# $package:$line $PID ", join(' ', @_), "\n";

1413

}

1414

1415

1416

}

1417

# ###########################################################################

1418

# End TableChunker package

1419

# ###########################################################################

Older »