1
# This program is copyright 2011 Percona Inc.
2
# This program is copyright 2007-2010 Baron Schwartz.
3
# Feedback and improvements are welcome.
5
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
6
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
7
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
9
# This program is free software; you can redistribute it and/or modify it under
10
# the terms of the GNU General Public License as published by the Free Software
11
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
12
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
15
# You should have received a copy of the GNU General Public License along with
16
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17
# Place, Suite 330, Boston, MA 02111-1307 USA.
18
# ###########################################################################
19
# TableChunker package $Revision: 7169 $
20
# ###########################################################################
22
# Package: TableChunker
23
# TableChunker helps determine how to "chunk" a table. Chunk are
24
# pre-determined ranges of rows defined by boundary values (sometimes also
25
# called endpoints) on numeric or numeric-like columns, including date/time
26
# types. Any numeric column type that MySQL can do positional comparisons
27
# (<, <=, >, >=) on works. Chunking on character data is not supported yet
28
# (but see <issue 568 at http://code.google.com/p/maatkit/issues/detail?id=568>).
30
# Usually chunks range over all rows in a table but sometimes they only
31
# range over a subset of rows if an optional where arg is passed to various
32
# subs. In either case a chunk is like "`col` >= 5 AND `col` < 10". If
33
# col is of type int and is unique, then that chunk ranges over up to 5 rows.
35
# Chunks are included in WHERE clauses by various tools to do work on discrete
36
# chunks of the table instead of trying to work on the entire table at once.
37
# Chunks do not overlap and their size is configurable via the chunk_size arg
38
# passed to several subs. The chunk_size can be a number of rows or a size
39
# like 1M, in which case it's in estimated bytes of data. Real chunk sizes
40
# are usually close to the requested chunk_size but unless the optional exact
41
# arg is assed the real chunk sizes are approximate. Sometimes the
42
# distribution of values on the chunk colun can skew chunking. If, for
43
# example, col has values 0, 100, 101, ... then the zero value skews chunking.
44
# The zero_chunk arg handles this.
49
use warnings FATAL => 'all';
50
use English qw(-no_match_vars);
51
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
53
use POSIX qw(floor ceil);
54
use List::Util qw(min max);
56
$Data::Dumper::Indent = 1;
57
$Data::Dumper::Sortkeys = 1;
58
$Data::Dumper::Quotekeys = 0;
63
# $class - TableChunker (automatic)
67
# Quoter - <Quoter> object
68
# MySQLDump - <MySQLDump> object
70
my ( $class, %args ) = @_;
71
foreach my $arg ( qw(Quoter MySQLDump) ) {
72
die "I need a $arg argument" unless $args{$arg};
75
my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year);
76
my %real_types = map { $_ => 1 } qw(decimal double float);
80
int_types => \%int_types,
81
real_types => \%real_types,
82
EPOCH => '1970-01-01',
85
return bless $self, $class;
88
# Sub: find_chunk_columns
89
# Find chunkable columns.
95
# table_struct - Hashref returned from <TableParser::parse()>
98
# exact - bool: Try to support exact chunk sizes (may still chunk fuzzily)
101
# Array: whether the table can be chunked exactly if requested (zero
102
# otherwise), arrayref of columns that support chunking. Example:
106
# { column => 'id', index => 'PRIMARY' },
107
# { column => 'i', index => 'i_idx' },
110
sub find_chunk_columns {
111
my ( $self, %args ) = @_;
112
foreach my $arg ( qw(tbl_struct) ) {
113
die "I need a $arg argument" unless $args{$arg};
115
my $tbl_struct = $args{tbl_struct};
117
# See if there's an index that will support chunking.
118
my @possible_indexes;
119
foreach my $index ( values %{ $tbl_struct->{keys} } ) {
121
# Accept only BTREE indexes.
122
next unless $index->{type} eq 'BTREE';
124
# Reject indexes with prefixed columns.
125
next if grep { defined } @{$index->{col_prefixes}};
127
# If exact, accept only unique, single-column indexes.
128
if ( $args{exact} ) {
129
next unless $index->{is_unique} && @{$index->{cols}} == 1;
132
push @possible_indexes, $index;
134
MKDEBUG && _d('Possible chunk indexes in order:',
135
join(', ', map { $_->{name} } @possible_indexes));
137
# Build list of candidate chunk columns.
138
my $can_chunk_exact = 0;
140
foreach my $index ( @possible_indexes ) {
141
my $col = $index->{cols}->[0];
143
# Accept only integer or real number type columns or character columns.
144
my $col_type = $tbl_struct->{type_for}->{$col};
145
next unless $self->{int_types}->{$col_type}
146
|| $self->{real_types}->{$col_type}
147
|| $col_type =~ m/char/;
149
# Save the candidate column and its index.
150
push @candidate_cols, { column => $col, index => $index->{name} };
153
$can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols;
156
my $chunk_type = $args{exact} ? 'Exact' : 'Inexact';
157
_d($chunk_type, 'chunkable:',
158
join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols));
161
# Order the candidates by their original column order.
162
# Put the PK's first column first, if it's a candidate.
164
MKDEBUG && _d('Ordering columns by order in tbl, PK first');
165
if ( $tbl_struct->{keys}->{PRIMARY} ) {
166
my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0];
167
@result = grep { $_->{column} eq $pk_first_col } @candidate_cols;
168
@candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols;
171
my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}};
172
push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} }
176
_d('Chunkable columns:',
177
join(', ', map { "$_->{column} on $_->{index}" } @result));
178
_d('Can chunk exactly:', $can_chunk_exact);
181
return ($can_chunk_exact, @result);
184
# Sub: calculate_chunks
185
# Calculate chunks for the given range statistics. Args min, max and
186
# rows_in_range are returned from get_range_statistics() which is usually
187
# called before this sub. Min and max are expected to be valid values
193
# Required Arguments:
197
# tbl_struct - retval of <TableParser::parse()>
198
# chunk_col - column name to chunk on
199
# min - min col value, from <TableChunker::get_range_statistics()>
200
# max - max col value, from <TableChunker::get_range_statistics()>
201
# rows_in_range - number of rows to chunk, from
202
# <TableChunker::get_range_statistics()>
203
# chunk_size - requested size of each chunk
205
# Optional Arguments:
206
# exact - Use exact chunk_size? Use approximates is not.
207
# tries - Fetch up to this many rows to find a non-zero value
208
# chunk_range - Make chunk range open (default) or openclosed
211
# Array of WHERE predicates like "`col` >= '10' AND `col` < '20'",
212
# one for each chunk. All values are single-quoted due to <issue 1002 at
213
# http://code.google.com/p/maatkit/issues/detail?id=1002>. Example:
216
# `film_id` >= '30' AND `film_id` < '60',
217
# `film_id` >= '60' AND `film_id` < '90',
220
sub calculate_chunks {
221
my ( $self, %args ) = @_;
222
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
223
foreach my $arg ( @required_args ) {
224
die "I need a $arg argument" unless defined $args{$arg};
226
MKDEBUG && _d('Calculate chunks for',
227
join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")}
228
qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact)
231
if ( !$args{rows_in_range} ) {
232
MKDEBUG && _d("Empty table");
236
# http://code.google.com/p/maatkit/issues/detail?id=1084
237
if ( $args{rows_in_range} < $args{chunk_size} ) {
238
MKDEBUG && _d("Chunk size larger than rows in range");
242
my $q = $self->{Quoter};
243
my $dbh = $args{dbh};
244
my $chunk_col = $args{chunk_col};
245
my $tbl_struct = $args{tbl_struct};
246
my $col_type = $tbl_struct->{type_for}->{$chunk_col};
247
MKDEBUG && _d('chunk col type:', $col_type);
249
# Get chunker info for the column type. Numeric cols are chunked
250
# differently than char cols.
252
if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) {
253
%chunker = $self->_chunk_numeric(%args);
255
elsif ( $col_type =~ m/char/ ) {
256
%chunker = $self->_chunk_char(%args);
259
die "Cannot chunk $col_type columns";
261
MKDEBUG && _d("Chunker:", Dumper(\%chunker));
262
my ($col, $start_point, $end_point, $interval, $range_func)
263
= @chunker{qw(col start_point end_point interval range_func)};
265
# Generate a list of chunk boundaries. The first and last chunks are
266
# inclusive, and will catch any rows before or after the end of the
267
# supposed range. So 1-100 divided into chunks of 30 should actually end
268
# up with chunks like this:
273
# If zero_chunk was specified and zero chunking was possible, the first
274
# chunk will be = 0 to catch any zero or zero-equivalent (e.g. 00:00:00)
277
if ( $start_point < $end_point ) {
279
# The zero chunk, if there is one. It doesn't have to be the first
280
# chunk. The 0 cannot be quoted because if d='0000-00-00' then
281
# d=0 will work but d='0' will cause warning 1292: Incorrect date
282
# value: '0' for column 'd'. This might have to column-specific in
283
# future when we chunk on more exotic column types.
284
push @chunks, "$col = 0" if $chunker{have_zero_chunk};
288
for ( my $i = $start_point; $i < $end_point; $i += $interval ) {
289
($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point);
292
if ( $iter++ == 0 ) {
294
($chunker{have_zero_chunk} ? "$col > 0 AND " : "")
295
."$col < " . $q->quote_val($end);
298
# The normal case is a chunk in the middle of the range somewhere.
299
push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end);
303
# Remove the last chunk and replace it with one that matches everything
304
# from the beginning of the last chunk to infinity, or to the max col
305
# value if chunk_range is openclosed. If the chunk column is nullable,
306
# do NULL separately.
307
my $chunk_range = lc $args{chunk_range} || 'open';
308
my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}};
311
push @chunks, "$col >= " . $q->quote_val($beg)
312
. ($chunk_range eq 'openclosed'
313
? " AND $col <= " . $q->quote_val($args{max}) : "");
316
push @chunks, $nullable ? "$col IS NOT NULL" : '1=1';
319
push @chunks, "$col IS NULL";
323
# There are no chunks; just do the whole table in one chunk.
324
MKDEBUG && _d('No chunks; using single chunk 1=1');
331
# Sub: _chunk_numeric
332
# Determine how to chunk a numeric column.
337
# Required Arguments:
341
# tbl_struct - retval of <TableParser::parse()>
342
# chunk_col - column name to chunk on
343
# min - min col value, from <TableChunker::get_range_statistics()>
344
# max - max col value, from <TableChunker::get_range_statistics()>
345
# rows_in_range - number of rows to chunk, from
346
# <TableChunker::get_range_statistics()>
347
# chunk_size - requested size of each chunk
349
# Optional Arguments:
350
# exact - Use exact chunk_size? Use approximates is not.
351
# tries - Fetch up to this many rows to find a non-zero value
352
# zero_chunk - Add an extra chunk for zero values? (0, 00:00, etc.)
355
# Array of chunker info that <calculate_chunks()> uses to create
358
# col => quoted chunk column name
359
# start_point => start value (a Perl number)
360
# end_point => end value (a Perl number)
361
# interval => interval to walk from start_ to end_point (a Perl number)
362
# range_func => coderef to return a value while walking that ^ range
363
# have_zero_chunk => whether to include a zero chunk (col=0)
366
my ( $self, %args ) = @_;
367
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
368
foreach my $arg ( @required_args ) {
369
die "I need a $arg argument" unless defined $args{$arg};
371
my $q = $self->{Quoter};
372
my $db_tbl = $q->quote($args{db}, $args{tbl});
373
my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}};
375
# Convert the given MySQL values to (Perl) numbers using some MySQL function.
376
# E.g.: SELECT TIME_TO_SEC('12:34') == 45240.
378
if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
379
$range_func = 'range_num';
381
elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
382
$range_func = "range_$col_type";
384
elsif ( $col_type eq 'datetime' ) {
385
$range_func = 'range_datetime';
388
my ($start_point, $end_point);
390
$start_point = $self->value_to_number(
392
column_type => $col_type,
395
$end_point = $self->value_to_number(
397
column_type => $col_type,
402
if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) {
403
# Special kind of error doesn't make sense with the more verbose
408
die "Error calculating chunk start and end points for table "
409
. "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` "
410
. "with min/max values "
412
map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max))
415
. "\nVerify that the min and max values are valid for the column. "
416
. "If they are valid, this error could be caused by a bug in the "
421
# The end points might be NULL in the pathological case that the table
422
# has nothing but NULL values. If there's at least one non-NULL value
423
# then MIN() and MAX() will return it. Otherwise, the only thing to do
424
# is make NULL end points zero to make the code below work and any NULL
425
# values will be handled by the special "IS NULL" chunk.
426
if ( !defined $start_point ) {
427
MKDEBUG && _d('Start point is undefined');
430
if ( !defined $end_point || $end_point < $start_point ) {
431
MKDEBUG && _d('End point is undefined or before start point');
434
MKDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point);
436
# Determine if we can include a zero chunk (col = 0). If yes, then
437
# make sure the start point is non-zero. $start_point and $end_point
438
# should be numbers (converted from MySQL values earlier). The purpose
439
# of the zero chunk is to capture a potentially large number of zero
440
# values that might imbalance the size of the first chunk. E.g. if a
441
# lot of invalid times were inserted and stored as 00:00:00, these
442
# zero (equivalent) values are captured by the zero chunk instead of
443
# the first chunk + all the non-zero values in the first chunk.
444
my $have_zero_chunk = 0;
445
if ( $args{zero_chunk} ) {
446
if ( $start_point != $end_point && $start_point >= 0 ) {
447
MKDEBUG && _d('Zero chunking');
448
my $nonzero_val = $self->get_nonzero_value(
451
col => $args{chunk_col},
452
col_type => $col_type,
455
# Since we called value_to_number() before with this column type
456
# we shouldn't have to worry about it dying here--it would have
457
# died earlier if we can't chunk the column type.
458
$start_point = $self->value_to_number(
459
value => $nonzero_val,
460
column_type => $col_type,
463
$have_zero_chunk = 1;
466
MKDEBUG && _d("Cannot zero chunk");
469
MKDEBUG && _d("Using chunk range:", $start_point, "to", $end_point);
471
# Calculate the chunk size in terms of "distance between endpoints"
472
# that will give approximately the right number of rows between the
473
# endpoints. If possible and requested, forbid chunks from being any
474
# bigger than specified.
475
my $interval = $args{chunk_size}
476
* ($end_point - $start_point)
477
/ $args{rows_in_range};
478
if ( $self->{int_types}->{$col_type} ) {
479
$interval = ceil($interval);
481
$interval ||= $args{chunk_size};
482
if ( $args{exact} ) {
483
$interval = $args{chunk_size};
485
MKDEBUG && _d('Chunk interval:', $interval, 'units');
488
col => $q->quote($args{chunk_col}),
489
start_point => $start_point,
490
end_point => $end_point,
491
interval => $interval,
492
range_func => $range_func,
493
have_zero_chunk => $have_zero_chunk,
497
# Sub: _chunk_numeric
498
# Determine how to chunk a character column.
503
# Required Arguments:
507
# tbl_struct - retval of <TableParser::parse()>
508
# chunk_col - column name to chunk on
509
# min - min col value, from <TableChunker::get_range_statistics()>
510
# max - max col value, from <TableChunker::get_range_statistics()>
511
# rows_in_range - number of rows to chunk, from
512
# <TableChunker::get_range_statistics()>
513
# chunk_size - requested size of each chunk
516
# Array of chunker info that <calculate_chunks()> uses to create
519
# col => quoted chunk column name
520
# start_point => start value (a Perl number)
521
# end_point => end value (a Perl number)
522
# interval => interval to walk from start_ to end_point (a Perl number)
523
# range_func => coderef to return a value while walking that ^ range
526
my ( $self, %args ) = @_;
527
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
528
foreach my $arg ( @required_args ) {
529
die "I need a $arg argument" unless defined $args{$arg};
531
my $q = $self->{Quoter};
532
my $db_tbl = $q->quote($args{db}, $args{tbl});
533
my $dbh = $args{dbh};
534
my $chunk_col = $args{chunk_col};
538
# Get what MySQL says are the min and max column values.
539
# For example, is 'a' or 'A' the min according to MySQL?
540
$sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl "
541
. "ORDER BY `$chunk_col`";
542
MKDEBUG && _d($dbh, $sql);
543
$row = $dbh->selectrow_arrayref($sql);
544
my ($min_col, $max_col) = ($row->[0], $row->[1]);
546
# Get the character codes between the min and max column values.
547
$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
548
MKDEBUG && _d($dbh, $sql);
549
my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
550
$ord_sth->execute($min_col, $max_col);
551
$row = $ord_sth->fetchrow_arrayref();
552
my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]);
553
MKDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord);
555
# Create a sorted chacater-to-number map of the unique characters in
556
# the column ranging from the min character code to the max.
559
MKDEBUG && _d("Table charset:", $args{tbl_struct}->{charset});
560
if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) {
561
# These are the unique, sorted latin1 character codes according to
562
# MySQL. You'll notice that many are missing. That's because MySQL
563
# treats many characters as the same, for example "e" and "é".
564
my @sorted_latin1_chars = (
565
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
566
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
567
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
568
74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
569
88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161,
570
162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
571
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
572
190, 191, 215, 216, 222, 223, 247, 255);
574
my ($first_char, $last_char);
575
for my $i ( 0..$#sorted_latin1_chars ) {
576
$first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord;
578
for my $i ( $first_char..$#sorted_latin1_chars ) {
579
$last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord;
582
@chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char];
583
$base = scalar @chars;
586
# If the table's charset isn't latin1, who knows what charset is being
587
# used, what characters it contains, and how those characters are sorted.
588
# So we create a character map and let MySQL tell us these things.
590
# Create a temp table with the same char col def as the original table.
591
my $tmp_tbl = '__maatkit_char_chunking_map';
592
my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl);
593
$sql = "DROP TABLE IF EXISTS $tmp_db_tbl";
594
MKDEBUG && _d($dbh, $sql);
596
my $col_def = $args{tbl_struct}->{defs}->{$chunk_col};
597
$sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) "
599
MKDEBUG && _d($dbh, $sql);
602
# Populate the temp table with all the characters between the min and max
603
# max character codes. This is our character-to-number map.
604
$sql = "INSERT INTO $tmp_db_tbl VALUE (CHAR(?))";
605
MKDEBUG && _d($dbh, $sql);
606
my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues
607
for my $char_code ( $min_col_ord..$max_col_ord ) {
608
$ins_char_sth->execute($char_code);
611
# Select from the char-to-number map all characters between the
612
# min and max col values, letting MySQL order them. The first
613
# character returned becomes "zero" in a new base system of counting,
614
# the second character becomes "one", etc. So if 42 chars are
615
# returned like [a, B, c, d, é, ..., ü] then we have a base 42
616
# system where 0=a, 1=B, 2=c, 3=d, 4=é, ... 41=ü. count_base()
617
# helps us count in arbitrary systems.
618
$sql = "SELECT `$chunk_col` FROM $tmp_db_tbl "
619
. "WHERE `$chunk_col` BETWEEN ? AND ? "
620
. "ORDER BY `$chunk_col`";
621
MKDEBUG && _d($dbh, $sql);
622
my $sel_char_sth = $dbh->prepare($sql);
623
$sel_char_sth->execute($min_col, $max_col);
625
@chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() };
626
$base = scalar @chars;
628
$sql = "DROP TABLE $tmp_db_tbl";
629
MKDEBUG && _d($dbh, $sql);
632
MKDEBUG && _d("Base", $base, "chars:", @chars);
634
# Now we begin calculating how to chunk the char column. This is
635
# completely different from _chunk_numeric because we're not dealing
636
# with the values to chunk directly (the characters) but rather a map.
638
# In our base system, how many values can 1, 2, etc. characters express?
639
# E.g. in a base 26 system (a-z), 1 char expresses 26^1=26 chars (a-z),
640
# 2 chars expresses 26^2=676 chars. If the requested chunk size is 100,
641
# then 1 char might not express enough values, but 2 surely can. This
642
# is imperefect because we don't know if we have data like: [apple, boy,
643
# car] (i.e. values evenly distributed across the range of chars), or
644
# [ant, apple, azur, boy]. We assume data is more evenly distributed
645
# than not so we use the minimum number of characters to express a chunk
647
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`";
648
MKDEBUG && _d($dbh, $sql);
649
$row = $dbh->selectrow_arrayref($sql);
650
my $max_col_len = $row->[0];
651
MKDEBUG && _d("Max column value:", $max_col, $max_col_len);
653
for my $n_chars ( 1..$max_col_len ) {
654
$n_values = $base**$n_chars;
655
if ( $n_values >= $args{chunk_size} ) {
656
MKDEBUG && _d($n_chars, "chars in base", $base, "expresses",
657
$n_values, "values");
662
# Our interval is not like a _chunk_numeric() interval, either, because
663
# we don't increment in the actual values (i.e. the characters) but rather
664
# in the char-to-number map. If the above calculation found that 1 char
665
# expressed enough values for 1 chunk, then each char in the map will
666
# yield roughly one chunk of values, so the interval is 1. Or, if we need
667
# 2 chars to express enough vals for 1 chunk, then we'll increment through
668
# the map 2 chars at a time, like [a, b], [c, d], etc.
669
my $n_chunks = $args{rows_in_range} / $args{chunk_size};
670
my $interval = floor($n_values / $n_chunks) || 1;
672
my $range_func = sub {
673
my ( $self, $dbh, $start, $interval, $max ) = @_;
674
my $start_char = $self->base_count(
679
my $end_char = $self->base_count(
680
count_to => min($max, $start + $interval),
684
return $start_char, $end_char;
688
col => $q->quote($chunk_col),
690
end_point => $n_values,
691
interval => $interval,
692
range_func => $range_func,
696
# Sub: get_first_chunkable_column
697
# Get the first chunkable column in a table.
698
# Only a "sane" column/index is returned. That means that
699
# the first auto-detected chunk col/index are used if any combination of
700
# preferred chunk col or index would be really bad, like chunk col=x
701
# and chunk index=some index over (y, z). That's bad because the index
702
# doesn't include the column; it would also be bad if the column wasn't
703
# a left-most prefix of the index.
708
# Required Arguments:
709
# tbl_struct - Hashref returned by <TableParser::parse()>
711
# Optional arguments:
712
# chunk_column - Preferred chunkable column name
713
# chunk_index - Preferred chunkable column index name
714
# exact - bool: passed to <find_chunk_columns()>
717
# List: chunkable column name, chunkable colum index
718
sub get_first_chunkable_column {
719
my ( $self, %args ) = @_;
720
foreach my $arg ( qw(tbl_struct) ) {
721
die "I need a $arg argument" unless $args{$arg};
724
# First auto-detected chunk col/index. If any combination of preferred
725
# chunk col or index are specified and are sane, they will overwrite
726
# these defaults. Else, these defaults will be returned.
727
my ($exact, @cols) = $self->find_chunk_columns(%args);
728
my $col = $cols[0]->{column};
729
my $idx = $cols[0]->{index};
731
# Wanted/preferred chunk column and index. Caller only gets what
732
# they want, though, if it results in a sane col/index pair.
733
my $wanted_col = $args{chunk_column};
734
my $wanted_idx = $args{chunk_index};
735
MKDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx);
737
if ( $wanted_col && $wanted_idx ) {
738
# Preferred column and index: check that the pair is sane.
739
foreach my $chunkable_col ( @cols ) {
740
if ( $wanted_col eq $chunkable_col->{column}
741
&& $wanted_idx eq $chunkable_col->{index} ) {
742
# The wanted column is chunkable with the wanted index.
749
elsif ( $wanted_col ) {
750
# Preferred column, no index: check if column is chunkable, if yes
751
# then use its index, else fall back to default col/index.
752
foreach my $chunkable_col ( @cols ) {
753
if ( $wanted_col eq $chunkable_col->{column} ) {
754
# The wanted column is chunkable, so use its index and overwrite
757
$idx = $chunkable_col->{index};
762
elsif ( $wanted_idx ) {
763
# Preferred index, no column: check if index's left-most column is
764
# chunkable, if yes then use its column, else fall back to auto-detected
766
foreach my $chunkable_col ( @cols ) {
767
if ( $wanted_idx eq $chunkable_col->{index} ) {
768
# The wanted index has a chunkable column, so use it and overwrite
770
$col = $chunkable_col->{column};
777
MKDEBUG && _d('First chunkable col/index:', $col, $idx);
782
# Convert a size in rows or bytes to a number of rows in the table,
783
# using SHOW TABLE STATUS. If the size is a string with a suffix of M/G/k,
784
# interpret it as mebibytes, gibibytes, or kibibytes respectively.
785
# If it's just a number, treat it as a number of rows and return right away.
790
# Required Arguments:
794
# chunk_size - Chunk size string like "1000" or "50M"
797
# Array: number of rows, average row size
799
my ( $self, %args ) = @_;
800
my @required_args = qw(dbh db tbl chunk_size);
801
foreach my $arg ( @required_args ) {
802
die "I need a $arg argument" unless $args{$arg};
804
my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args};
805
my $q = $self->{Quoter};
806
my $du = $self->{MySQLDump};
808
my ($n_rows, $avg_row_length);
810
my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/;
811
if ( $suffix ) { # Convert to bytes.
812
$chunk_size = $suffix eq 'k' ? $num * 1_024
813
: $suffix eq 'M' ? $num * 1_024 * 1_024
814
: $num * 1_024 * 1_024 * 1_024;
820
die "Invalid chunk size $chunk_size; must be an integer "
821
. "with optional suffix kMG";
824
if ( $suffix || $args{avg_row_length} ) {
825
my ($status) = $du->get_table_status($dbh, $q, $db, $tbl);
826
$avg_row_length = $status->{avg_row_length};
827
if ( !defined $n_rows ) {
828
$n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef;
832
return $n_rows, $avg_row_length;
835
# Sub: get_range_statistics
836
# Determine the range of values for the chunk_col column on this table.
841
# Required Arguments:
845
# chunk_col - Chunk column name
846
# tbl_struct - Hashref returned by <TableParser::parse()>
848
# Optional arguments:
849
# where - WHERE clause without "WHERE" to restrict range
850
# index_hint - "FORCE INDEX (...)" clause
851
# tries - Fetch up to this many rows to find a valid value
854
# Array: min row value, max row value, rows in range
855
sub get_range_statistics {
856
my ( $self, %args ) = @_;
857
my @required_args = qw(dbh db tbl chunk_col tbl_struct);
858
foreach my $arg ( @required_args ) {
859
die "I need a $arg argument" unless $args{$arg};
861
my ($dbh, $db, $tbl, $col) = @args{@required_args};
862
my $where = $args{where};
863
my $q = $self->{Quoter};
865
my $col_type = $args{tbl_struct}->{type_for}->{$col};
866
my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col};
868
# Quote these once so we don't have to do it again.
869
my $db_tbl = $q->quote($db, $tbl);
870
$col = $q->quote($col);
874
# First get the actual end points, whatever MySQL considers the
875
# min and max values to be for this column.
876
my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl"
877
. ($args{index_hint} ? " $args{index_hint}" : "")
878
. ($where ? " WHERE ($where)" : '');
879
MKDEBUG && _d($dbh, $sql);
880
($min, $max) = $dbh->selectrow_array($sql);
881
MKDEBUG && _d("Actual end points:", $min, $max);
883
# Now, for two reasons, get the valid end points. For one, an
884
# end point may be 0 or some zero-equivalent and the user doesn't
885
# want that because it skews the range. Or two, an end point may
886
# be an invalid value like date 2010-00-00 and we can't use that.
887
($min, $max) = $self->get_valid_end_points(
892
col_type => $col_type,
896
MKDEBUG && _d("Valid end points:", $min, $max);
899
die "Error getting min and max values for table $db_tbl "
900
. "on column $col: $EVAL_ERROR";
903
# Finally get the total number of rows in range, usually the whole
904
# table unless there's a where arg restricting the range.
905
my $sql = "EXPLAIN SELECT * FROM $db_tbl"
906
. ($args{index_hint} ? " $args{index_hint}" : "")
907
. ($where ? " WHERE $where" : '');
909
my $expl = $dbh->selectrow_hashref($sql);
914
rows_in_range => $expl->{rows},
919
# Create a SQL statement from a query prototype by filling in placeholders.
924
# Required Arguments:
925
# database - Database name
927
# chunks - Arrayref of chunks from <calculate_chunks()>
928
# chunk_num - Index into chunks to use
929
# query - Query prototype returned by
930
# <TableChecksum::make_checksum_query()>
932
# Optional Arguments:
933
# index_hint - "FORCE INDEX (...)" clause
934
# where - Arrayref of WHERE clauses joined with AND
939
my ( $self, %args ) = @_;
940
foreach my $arg ( qw(database table chunks chunk_num query) ) {
941
die "I need a $arg argument" unless defined $args{$arg};
943
MKDEBUG && _d('Injecting chunk', $args{chunk_num});
944
my $query = $args{query};
945
my $comment = sprintf("/*%s.%s:%d/%d*/",
946
$args{database}, $args{table},
947
$args{chunk_num} + 1, scalar @{$args{chunks}});
948
$query =~ s!/\*PROGRESS_COMMENT\*/!$comment!;
949
my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')';
950
if ( $args{where} && grep { $_ } @{$args{where}} ) {
952
. join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} )
955
my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)});
956
my $index_hint = $args{index_hint} || '';
958
MKDEBUG && _d('Parameters:',
959
Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint}));
960
$query =~ s!/\*WHERE\*/! $where!;
961
$query =~ s!/\*DB_TBL\*/!$db_tbl!;
962
$query =~ s!/\*INDEX_HINT\*/! $index_hint!;
963
$query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!;
968
# #############################################################################
969
# MySQL value to Perl number conversion.
970
# #############################################################################
972
# Convert a MySQL column value to a Perl integer.
974
# * value scalar: MySQL value to convert
975
# * column_type scalar: MySQL column type of the value
977
# Returns an integer or undef if the value isn't convertible
978
# (e.g. date 0000-00-00 is not convertible).
979
sub value_to_number {
980
my ( $self, %args ) = @_;
981
my @required_args = qw(column_type dbh);
982
foreach my $arg ( @required_args ) {
983
die "I need a $arg argument" unless defined $args{$arg};
985
my $val = $args{value};
986
my ($col_type, $dbh) = @args{@required_args};
987
MKDEBUG && _d('Converting MySQL', $col_type, $val);
989
return unless defined $val; # value is NULL
991
# MySQL functions to convert a non-numeric value to a number
992
# so we can do basic math on it in Perl. Right now we just
993
# convert temporal values but later we'll need to convert char
995
my %mysql_conv_func_for = (
996
timestamp => 'UNIX_TIMESTAMP',
998
time => 'TIME_TO_SEC',
999
datetime => 'TO_DAYS',
1002
# Convert the value to a number that Perl can do arithmetic with.
1004
if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
1005
# These types are already numbers.
1008
elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
1009
# These are temporal values. Convert them using a MySQL func.
1010
my $func = $mysql_conv_func_for{$col_type};
1011
my $sql = "SELECT $func(?)";
1012
MKDEBUG && _d($dbh, $sql, $val);
1013
my $sth = $dbh->prepare($sql);
1014
$sth->execute($val);
1015
($num) = $sth->fetchrow_array();
1017
elsif ( $col_type eq 'datetime' ) {
1018
# This type is temporal, too, but needs special handling.
1019
# Newer versions of MySQL could use TIMESTAMPDIFF, but it's easier
1020
# to maintain just one kind of code, so I do it all with DATE_ADD().
1021
$num = $self->timestampdiff($dbh, $val);
1024
die "I don't know how to chunk $col_type\n";
1026
MKDEBUG && _d('Converts to', $num);
1030
# ###########################################################################
1032
# ###########################################################################
1034
my ( $self, $dbh, $start, $interval, $max ) = @_;
1035
my $end = min($max, $start + $interval);
1038
# "Remove" scientific notation so the regex below does not make
1039
# 6.123456e+18 into 6.12345.
1040
$start = sprintf('%.17f', $start) if $start =~ /e/;
1041
$end = sprintf('%.17f', $end) if $end =~ /e/;
1043
# Trim decimal places, if needed. This helps avoid issues with float
1044
# precision differing on different platforms.
1045
$start =~ s/\.(\d{5}).*$/.$1/;
1046
$end =~ s/\.(\d{5}).*$/.$1/;
1048
if ( $end > $start ) {
1049
return ( $start, $end );
1052
die "Chunk size is too small: $end !> $start\n";
1057
my ( $self, $dbh, $start, $interval, $max ) = @_;
1058
my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))";
1059
MKDEBUG && _d($sql);
1060
return $dbh->selectrow_array($sql);
1064
my ( $self, $dbh, $start, $interval, $max ) = @_;
1065
my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))";
1066
MKDEBUG && _d($sql);
1067
return $dbh->selectrow_array($sql);
1070
sub range_datetime {
1071
my ( $self, $dbh, $start, $interval, $max ) = @_;
1072
my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), "
1073
. "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)";
1074
MKDEBUG && _d($sql);
1075
return $dbh->selectrow_array($sql);
1078
sub range_timestamp {
1079
my ( $self, $dbh, $start, $interval, $max ) = @_;
1080
my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))";
1081
MKDEBUG && _d($sql);
1082
return $dbh->selectrow_array($sql);
1085
# Returns the number of seconds between EPOCH and the value, according to
1086
# the MySQL server. (The server can do no wrong). I believe this code is right
1087
# after looking at the source of sql/time.cc but I am paranoid and add in an
1088
# extra check just to make sure. Earlier versions overflow on large interval
1089
# values, such as on 3.23.58, '1970-01-01' - interval 58000000000 second is
1090
# 2037-06-25 11:29:04. I know of no workaround. TO_DAYS('0000-....') is NULL,
1091
# so we treat it as 0.
1093
my ( $self, $dbh, $time ) = @_;
1094
my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) "
1095
. "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400";
1096
MKDEBUG && _d($sql);
1097
my ( $diff ) = $dbh->selectrow_array($sql);
1098
$sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)";
1099
MKDEBUG && _d($sql);
1100
my ( $check ) = $dbh->selectrow_array($sql);
1102
Incorrect datetime math: given $time, calculated $diff but checked to $check.
1103
This could be due to a version of MySQL that overflows on large interval
1104
values to DATE_ADD(), or the given datetime is not a valid date. If not,
1105
please report this as a bug.
1107
unless $check eq $time;
1112
# #############################################################################
1113
# End point validation.
1114
# #############################################################################
1116
# These sub require val (or min and max) args which usually aren't NULL
1117
# but could be zero so the usual "die ... unless $args{$arg}" check does
1120
# Returns valid min and max values. A valid val evaluates to a non-NULL value.
1123
# * db_tbl scalar: quoted `db`.`tbl`
1124
# * col scalar: quoted `column`
1125
# * col_type scalar: column type of the value
1126
# * min scalar: any scalar value
1127
# * max scalar: any scalar value
1128
# Optional arguments:
1129
# * index_hint scalar: "FORCE INDEX (...)" hint
1130
# * where scalar: WHERE clause without "WHERE"
1131
# * tries scalar: fetch up to this many rows to find a valid value
1132
# * zero_chunk bool: do a separate chunk for zero values
1133
# Some column types can store invalid values, like most of the temporal
1134
# types. When evaluated, invalid values return NULL. If the value is
1135
# NULL to begin with, then it is not invalid because NULL is valid.
1136
# For example, TO_DAYS('2009-00-00') evalues to NULL because that date
1137
# is invalid, even though it's storable.
1138
sub get_valid_end_points {
1139
my ( $self, %args ) = @_;
1140
my @required_args = qw(dbh db_tbl col col_type);
1141
foreach my $arg ( @required_args ) {
1142
die "I need a $arg argument" unless $args{$arg};
1144
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
1145
my ($real_min, $real_max) = @args{qw(min max)};
1147
# Common error message format in case there's a problem with
1148
# finding a valid min or max value.
1149
my $err_fmt = "Error finding a valid %s value for table $db_tbl on "
1150
. "column $col. The real %s value %s is invalid and "
1151
. "no other valid values were found. Verify that the table "
1152
. "has at least one valid value for this column"
1153
. ($args{where} ? " where $args{where}." : ".");
1155
# Validate min value if it's not NULL. NULL is valid.
1156
my $valid_min = $real_min;
1157
if ( defined $valid_min ) {
1158
# Get a valid min end point.
1159
MKDEBUG && _d("Validating min end point:", $real_min);
1160
$valid_min = $self->_get_valid_end_point(
1165
die sprintf($err_fmt, 'minimum', 'minimum',
1166
(defined $real_min ? $real_min : "NULL"))
1167
unless defined $valid_min;
1170
# Validate max value if it's not NULL. NULL is valid.
1171
my $valid_max = $real_max;
1172
if ( defined $valid_max ) {
1173
# Get a valid max end point. So far I've not found a case where
1174
# the actual max val is invalid, but check anyway just in case.
1175
MKDEBUG && _d("Validating max end point:", $real_min);
1176
$valid_max = $self->_get_valid_end_point(
1181
die sprintf($err_fmt, 'maximum', 'maximum',
1182
(defined $real_max ? $real_max : "NULL"))
1183
unless defined $valid_max;
1186
return $valid_min, $valid_max;
1189
# Does the actual work for get_valid_end_points() for each end point.
1190
sub _get_valid_end_point {
1191
my ( $self, %args ) = @_;
1192
my @required_args = qw(dbh db_tbl col col_type);
1193
foreach my $arg ( @required_args ) {
1194
die "I need a $arg argument" unless $args{$arg};
1196
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
1197
my $val = $args{val};
1200
return $val unless defined $val;
1202
# Right now we only validate temporal types, but when we begin
1203
# chunking char and hex columns we'll need to validate those.
1204
# E.g. HEX('abcdefg') is invalid and we'll probably find some
1205
# combination of char val + charset/collation that's invalid.
1206
my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value
1209
# If we cannot validate the value, assume it's valid.
1211
MKDEBUG && _d("No validator for", $col_type, "values");
1215
# Return the value if it's already valid.
1216
return $val if defined $validate->($dbh, $val);
1218
# The value is not valid so find the first one in the table that is.
1219
MKDEBUG && _d("Value is invalid, getting first valid value");
1220
$val = $self->get_first_valid_value(
1223
validate => $validate,
1231
# * db_tbl scalar: quoted `db`.`tbl`
1232
# * col scalar: quoted `column` name
1233
# * val scalar: the current value, may be real, maybe not
1234
# * validate coderef: returns a defined value if the given value is valid
1235
# * endpoint scalar: "min" or "max", i.e. find first endpoint() real val
1236
# Optional arguments:
1237
# * tries scalar: fetch up to this many rows to find a valid value
1238
# * index_hint scalar: "FORCE INDEX (...)" hint
1239
# * where scalar: WHERE clause without "WHERE"
1240
# Returns the first column value from the given db_tbl that does *not*
1241
# evaluate to NULL. This is used mostly to eliminate unreal temporal
1242
# values which MySQL allows to be stored, like "2010-00-00". Returns
1243
# undef if no real value is found.
1244
sub get_first_valid_value {
1245
my ( $self, %args ) = @_;
1246
my @required_args = qw(dbh db_tbl col validate endpoint);
1247
foreach my $arg ( @required_args ) {
1248
die "I need a $arg argument" unless $args{$arg};
1250
my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args};
1251
my $tries = defined $args{tries} ? $args{tries} : 5;
1252
my $val = $args{val};
1254
# NULL values are valid and shouldn't be passed to us.
1255
return unless defined $val;
1257
# Prep a sth for fetching the next col val.
1258
my $cmp = $endpoint =~ m/min/i ? '>'
1259
: $endpoint =~ m/max/i ? '<'
1260
: die "Invalid endpoint arg: $endpoint";
1261
my $sql = "SELECT $col FROM $db_tbl "
1262
. ($args{index_hint} ? "$args{index_hint} " : "")
1263
. "WHERE $col $cmp ? AND $col IS NOT NULL "
1264
. ($args{where} ? "AND ($args{where}) " : "")
1265
. "ORDER BY $col LIMIT 1";
1266
MKDEBUG && _d($dbh, $sql);
1267
my $sth = $dbh->prepare($sql);
1269
# Fetch the next col val from the db.tbl until we find a valid one
1270
# or run out of rows. Only try a limited number of next rows.
1271
my $last_val = $val;
1272
while ( $tries-- ) {
1273
$sth->execute($last_val);
1274
my ($next_val) = $sth->fetchrow_array();
1275
MKDEBUG && _d('Next value:', $next_val, '; tries left:', $tries);
1276
if ( !defined $next_val ) {
1277
MKDEBUG && _d('No more rows in table');
1280
if ( defined $validate->($dbh, $next_val) ) {
1281
MKDEBUG && _d('First valid value:', $next_val);
1285
$last_val = $next_val;
1288
$val = undef; # no valid value found
1293
# Evalutes any temporal value, returns NULL if it's invalid, else returns
1294
# a value (possibly zero). It's magical but tested. See also,
1295
# http://hackmysql.com/blog/2010/05/26/detecting-invalid-and-zero-temporal-values/
1296
sub _validate_temporal_value {
1297
my ( $dbh, $val ) = @_;
1298
my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))";
1301
MKDEBUG && _d($dbh, $sql, $val);
1302
my $sth = $dbh->prepare($sql);
1303
$sth->execute($val, $val, $val, $val);
1304
($res) = $sth->fetchrow_array();
1307
if ( $EVAL_ERROR ) {
1308
MKDEBUG && _d($EVAL_ERROR);
1313
sub get_nonzero_value {
1314
my ( $self, %args ) = @_;
1315
my @required_args = qw(dbh db_tbl col col_type);
1316
foreach my $arg ( @required_args ) {
1317
die "I need a $arg argument" unless $args{$arg};
1319
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
1320
my $tries = defined $args{tries} ? $args{tries} : 5;
1321
my $val = $args{val};
1323
# Right now we only need a special check for temporal values.
1324
# _validate_temporal_value() does double-duty for this. The
1325
# default anonymous sub handles ints.
1326
my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value
1327
: sub { return $_[1]; };
1329
if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry
1330
MKDEBUG && _d('Discarding zero value:', $val);
1331
my $sql = "SELECT $col FROM $db_tbl "
1332
. ($args{index_hint} ? "$args{index_hint} " : "")
1333
. "WHERE $col > ? AND $col IS NOT NULL "
1334
. ($args{where} ? "AND ($args{where}) " : '')
1335
. "ORDER BY $col LIMIT 1";
1336
MKDEBUG && _d($sql);
1337
my $sth = $dbh->prepare($sql);
1339
my $last_val = $val;
1340
while ( $tries-- ) {
1341
$sth->execute($last_val);
1342
my ($next_val) = $sth->fetchrow_array();
1343
if ( $is_nonzero->($dbh, $next_val) ) {
1344
MKDEBUG && _d('First non-zero value:', $next_val);
1348
$last_val = $next_val;
1351
$val = undef; # no non-zero value found
1358
# Count to any number in any base with the given symbols. E.g. if counting
1359
# to 10 in base 16 with symbols 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f the result
1360
# is "a". This is trival for stuff like base 16 (hex), but far less trivial
1361
# for arbitrary bases with arbitrary symbols like base 25 with symbols
1362
# B,C,D,...X,Y,Z. For that, counting to 10 results in "L". The base and its
1363
# symbols are determined by the character column. Symbols can be non-ASCII.
1368
# Required Arguments:
1369
# count_to - Number to count to
1370
# base - Base of special system
1371
# symbols - Arrayref of symbols for "numbers" in special system
1374
# The "number" (symbol) in the special target base system
1376
my ( $self, %args ) = @_;
1377
my @required_args = qw(count_to base symbols);
1378
foreach my $arg ( @required_args ) {
1379
die "I need a $arg argument" unless defined $args{$arg};
1381
my ($n, $base, $symbols) = @args{@required_args};
1383
# Can't take log of zero and the zeroth symbol in any base is the
1384
# zeroth symbol in any other base.
1385
return $symbols->[0] if $n == 0;
1387
my $highest_power = floor(log($n)/log($base));
1388
if ( $highest_power == 0 ){
1389
return $symbols->[$n];
1393
for my $power ( 0..$highest_power ) {
1394
push @base_powers, ($base**$power) || 1;
1398
foreach my $base_power ( reverse @base_powers ) {
1399
my $multiples = floor($n / $base_power);
1400
push @base_multiples, $multiples;
1401
$n -= $multiples * $base_power;
1404
return join('', map { $symbols->[$_] } @base_multiples);
1408
my ($package, undef, $line) = caller 0;
1409
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1410
map { defined $_ ? $_ : 'undef' }
1412
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1417
# ###########################################################################
1418
# End TableChunker package
1419
# ###########################################################################