4
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
5
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
6
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
10
use warnings FATAL => 'all';
11
use English qw(-no_match_vars);
23
my $dp = new DSNParser(opts=>$dsn_opts);
24
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
25
my $dbh = $sb->get_dbh_for('master');
28
plan skip_all => "Cannot connect to sandbox master";
34
$sb->create_dbs($dbh, ['test']);
37
my $tp = new TableParser(Quoter => $q);
38
my $vp = new VersionParser();
39
my $du = new MySQLDump();
40
my $c = new TableChecksum(Quoter=>$q, VersionParser=>$vp);
44
my %args = map { $_ => undef }
45
qw(db tbl tbl_struct algorithm function crc_wid crc_type opt_slice);
48
sub { $c->best_algorithm( %args, algorithm => 'foo', ) },
49
qr/Invalid checksum algorithm/,
53
# Inject the VersionParser with some bogus versions. Later I'll just pass the
54
# string version number instead of a real DBH, so the version parsing will
55
# return the value I want.
56
foreach my $ver( qw(4.0.0 4.1.1) ) {
57
$vp->{$ver} = $vp->parse($ver);
62
algorithm => 'CHECKSUM',
74
'Default is CHECKSUM',
79
algorithm => 'CHECKSUM',
84
'CHECKSUM eliminated by where',
89
algorithm => 'CHECKSUM',
94
'CHECKSUM eliminated by chunk',
99
algorithm => 'CHECKSUM',
104
'CHECKSUM eliminated by replicate',
113
'Default CHECKSUM eliminated by count',
118
algorithm => 'CHECKSUM',
123
'Explicit CHECKSUM not eliminated by count',
128
algorithm => 'CHECKSUM',
132
'CHECKSUM and BIT_XOR eliminated by version',
137
algorithm => 'BIT_XOR',
141
'BIT_XOR as requested',
146
algorithm => 'BIT_XOR',
150
'BIT_XOR eliminated by version',
155
algorithm => 'ACCUM',
159
'ACCUM as requested',
162
ok($c->is_hash_algorithm('ACCUM'), 'ACCUM is hash');
163
ok($c->is_hash_algorithm('BIT_XOR'), 'BIT_XOR is hash');
164
ok(!$c->is_hash_algorithm('CHECKSUM'), 'CHECKSUM is not hash');
171
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 1), 16, 10) "
172
. "AS UNSIGNED)), 10, 16), 1, '0')",
173
'FOO XOR slices 1 wide',
181
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
182
. "AS UNSIGNED)), 10, 16), 16, '0')",
183
'FOO XOR slices 16 wide',
191
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
192
. "AS UNSIGNED)), 10, 16), 16, '0'), "
193
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 17, 1), 16, 10) "
194
. "AS UNSIGNED)), 10, 16), 1, '0')",
195
'FOO XOR slices 17 wide',
203
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
204
. "AS UNSIGNED)), 10, 16), 16, '0'), "
205
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 17, 16), 16, 10) "
206
. "AS UNSIGNED)), 10, 16), 16, '0')",
207
'FOO XOR slices 32 wide',
216
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 1, 16), 16, 10) "
217
. "AS UNSIGNED)), 10, 16), 16, '0'), "
218
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 17, 16), 16, 10) "
219
. "AS UNSIGNED)), 10, 16), 16, '0')",
220
'XOR slice optimized in slice 0',
229
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 1, 16), 16, 10) "
230
. "AS UNSIGNED)), 10, 16), 16, '0'), "
231
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 17, 16), 16, 10) "
232
. "AS UNSIGNED)), 10, 16), 16, '0')",
233
'XOR slice optimized in slice 1',
236
$t = $tp->parse(load_file('t/lib/samples/sakila.film.sql'));
239
$c->make_row_checksum(
243
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
244
. q{SHA1(CONCAT_WS('#', }
245
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
246
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
247
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0, }
248
. q{CONCAT(ISNULL(`description`), ISNULL(`release_year`), }
249
. q{ISNULL(`original_language_id`), ISNULL(`length`), }
250
. q{ISNULL(`rating`), ISNULL(`special_features`))))},
251
'SHA1 query for sakila.film',
255
$c->make_row_checksum(
256
function => 'FNV_64',
259
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
261
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
262
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
263
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0)},
264
'FNV_64 query for sakila.film',
268
$c->make_row_checksum(
271
cols => [qw(film_id)],
273
q{`film_id`, SHA1(`film_id`)},
274
'SHA1 query for sakila.film with only one column',
278
$c->make_row_checksum(
281
cols => [qw(FILM_ID)],
283
q{`film_id`, SHA1(`film_id`)},
284
'Column names are case-insensitive',
288
$c->make_row_checksum(
291
cols => [qw(film_id title)],
294
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
299
$c->make_row_checksum(
302
cols => [qw(film_id title)],
305
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
310
$c->make_row_checksum(
313
cols => [qw(film_id title)],
316
q{`film_id`, `title`, SHA1(CONCAT_WS('#', `film_id`, `title`))},
317
'Really bad separator',
320
$t = $tp->parse(load_file('t/lib/samples/sakila.rental.float.sql'));
322
$c->make_row_checksum(
326
q{`rental_id`, `foo`, SHA1(CONCAT_WS('#', `rental_id`, `foo`))},
327
'FLOAT column is like any other',
331
$c->make_row_checksum(
334
float_precision => 5,
336
q{`rental_id`, ROUND(`foo`, 5), SHA1(CONCAT_WS('#', `rental_id`, ROUND(`foo`, 5)))},
337
'FLOAT column is rounded to 5 places',
340
$t = $tp->parse(load_file('t/lib/samples/sakila.film.sql'));
343
$c->make_row_checksum(
349
'VARCHAR column is trimmed',
353
$c->make_checksum_query(
358
algorithm => 'CHECKSUM',
361
crc_type => 'varchar',
363
'CHECKSUM TABLE `sakila`.`film`',
364
'Sakila.film CHECKSUM',
368
sub { $c->make_checksum_query(
373
algorithm => 'BIT_XOR',
375
cols => [qw(film_id)],
376
crc_type => 'varchar',
378
algorithm => 'CHECKSUM TABLE',
381
qr/missing checksum algorithm/,
382
'Complains about bad algorithm',
386
$c->make_checksum_query(
391
algorithm => 'BIT_XOR',
394
cols => [qw(film_id)],
395
crc_type => 'varchar',
397
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
398
. q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, }
399
. q{16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'), }
400
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 17, 16), 16, }
401
. q{10) AS UNSIGNED)), 10, 16), 16, '0'), }
402
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 33, 8), 16, }
403
. q{10) AS UNSIGNED)), 10, 16), 8, '0'))), 0) AS crc }
404
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
405
'Sakila.film SHA1 BIT_XOR',
409
$c->make_checksum_query(
414
algorithm => 'BIT_XOR',
415
function => 'FNV_64',
417
cols => [qw(film_id)],
418
crc_type => 'bigint',
420
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
421
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc }
422
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
423
'Sakila.film FNV_64 BIT_XOR',
427
$c->make_checksum_query(
432
algorithm => 'BIT_XOR',
433
function => 'FNV_64',
435
cols => [qw(film_id)],
437
crc_type => 'bigint',
439
q{SELECT SQL_BUFFER_RESULT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
440
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc }
441
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
442
'Sakila.film FNV_64 BIT_XOR',
446
$c->make_checksum_query(
451
algorithm => 'BIT_XOR',
454
cols => [qw(film_id)],
458
q{SELECT SQL_BUFFER_RESULT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
459
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc }
460
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
461
'Sakila.film CRC32 BIT_XOR',
465
$c->make_checksum_query(
470
algorithm => 'BIT_XOR',
473
cols => [qw(film_id)],
474
replicate => 'test.checksum',
475
crc_type => 'varchar',
477
q{REPLACE /*PROGRESS_COMMENT*/ INTO test.checksum }
478
. q{(db, tbl, chunk, boundaries, this_cnt, this_crc) }
479
. q{SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, }
480
. q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, }
481
. q{16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'), }
482
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 17, 16), 16, }
483
. q{10) AS UNSIGNED)), 10, 16), 16, '0'), }
484
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 33, 8), 16, }
485
. q{10) AS UNSIGNED)), 10, 16), 8, '0'))), 0) AS crc }
486
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
487
'Sakila.film SHA1 BIT_XOR with replication',
491
$c->make_checksum_query(
496
algorithm => 'ACCUM',
499
crc_type => 'varchar',
501
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
502
. q{COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, '0'), }
503
. q{SHA1(CONCAT(@crc, SHA1(CONCAT_WS('#', }
504
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
505
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
506
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0, }
507
. q{CONCAT(ISNULL(`description`), ISNULL(`release_year`), }
508
. q{ISNULL(`original_language_id`), ISNULL(`length`), }
509
. q{ISNULL(`rating`), ISNULL(`special_features`)))))))), 40), 0) AS crc }
510
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
511
'Sakila.film SHA1 ACCUM',
515
$c->make_checksum_query(
520
algorithm => 'ACCUM',
521
function => 'FNV_64',
523
crc_type => 'bigint',
525
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
526
. q{COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, '0'), }
527
. q{CONV(CAST(FNV_64(CONCAT(@crc, FNV_64(}
528
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
529
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
530
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0}
531
. q{))) AS UNSIGNED), 10, 16))), 16), 0) AS crc }
532
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
533
'Sakila.film FNV_64 ACCUM',
537
$c->make_checksum_query(
542
algorithm => 'ACCUM',
546
cols => [qw(film_id)],
548
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
549
. q{COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, '0'), }
550
. q{CONV(CAST(CRC32(CONCAT(@crc, CRC32(`film_id`}
551
. q{))) AS UNSIGNED), 10, 16))), 16), 0) AS crc }
552
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
553
'Sakila.film CRC32 ACCUM',
557
$c->make_checksum_query(
562
algorithm => 'ACCUM',
565
replicate => 'test.checksum',
566
crc_type => 'varchar',
568
q{REPLACE /*PROGRESS_COMMENT*/ INTO test.checksum }
569
. q{(db, tbl, chunk, boundaries, this_cnt, this_crc) }
570
. q{SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, }
571
. q{COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, '0'), }
572
. q{SHA1(CONCAT(@crc, SHA1(CONCAT_WS('#', }
573
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
574
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
575
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0, }
576
. q{CONCAT(ISNULL(`description`), ISNULL(`release_year`), }
577
. q{ISNULL(`original_language_id`), ISNULL(`length`), }
578
. q{ISNULL(`rating`), ISNULL(`special_features`)))))))), 40), 0) AS crc }
579
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
580
'Sakila.film SHA1 ACCUM with replication',
583
is ( $c->crc32('hello world'), 222957957, 'CRC32 of hello world');
585
# #############################################################################
587
# #############################################################################
589
$c->choose_hash_func(
592
qr/CRC32|FNV_64|MD5/,
593
'CRC32, FNV_64 or MD5 is default',
597
$c->choose_hash_func(
601
qr/CRC32|FNV_64|MD5/,
602
'SHA99 does not exist so I get CRC32 or friends',
606
$c->choose_hash_func(
611
'MD5 requested and MD5 granted',
633
[$c->get_crc_type($dbh, 'CRC32')],
635
'Type and length of CRC32'
639
[$c->get_crc_type($dbh, 'MD5')],
641
'Type and length of MD5'
644
# #############################################################################
645
# Issue 94: Enhance mk-table-checksum, add a --ignorecols option
646
# #############################################################################
647
$sb->load_file('master', 't/lib/samples/issue_94.sql');
648
$t= $tp->parse( $du->get_create_table($dbh, $q, 'test', 'issue_94') );
649
my $query = $c->make_checksum_query(
653
algorithm => 'ACCUM',
663
ignorecols => {'c'=>1},
666
'SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, \'0\'), CONV(CAST(CRC32(CONCAT(@crc, CRC32(CONCAT_WS(\'#\', `a`, `b`)))) AS UNSIGNED), 10, 16))), 16), 0) AS crc FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/',
667
'Ignores specified columns');
669
$sb->wipe_clean($dbh);