3
# This script is for testing Sereal decode speeds, with various
4
# generated test inputs (which are first encoded). Sample usages:
6
# decode.pl --build --output=data.srl
8
# will (1) build a "graph" (a hash of small strings, really,
9
# which can be seen as an adjacency list representation of
10
# a graph, the vertex and its neighbors) of 1e5 vertices
11
# (2) decode the encoded blob 5 times (the 'graph', 1e5, and 5
12
# being the defaults).
14
# Other inputs types (--type=T) are
15
# aoi (array of int) (value == key)
16
# aoir (array of int) (value == randomly shuffled key)
17
# aof (array of float) (rand())
18
# aos (array of string) (value eq key)
21
# hos (hash of string)
23
# The 'base' number of elements in each case is controlled by --elem=N.
24
# For the array and hash the number of elements is trivial, for the graph
25
# the total number of elements (in its hash-of-hashes) is O(N log N).
27
# The number decode repeats is controlled by --repeat_decode=N and --repeat_decode=N.
29
# The encode input needs to be built only once, the --output tells
30
# where to save the encoded blob. The encode blob can be read back
31
# from the save file with --input, much faster, especially in the case
40
use Fcntl qw[O_RDONLY O_WRONLY O_CREAT O_TRUNC];
41
use List::Util qw[shuffle];
46
my @Opt = ('input=s', 'output=s', 'type=s', 'elem=f', 'build',
47
'repeat_encode=i', 'repeat_decode=i',
49
# If non-zero, will drop the minimum and maximum
50
# values before computing statistics IF the number
51
# of measurements is at least this limit. So with
52
# a value of 5 will leave 3 measurements. Lowers
53
# the stddev, should not affect avg/median (much).
54
# Helpful in reducing cache effects.
55
'min_max_drop_limit=i',
58
my %OptO = map { my ($n) = /^(\w+)/; $_ => \$Opt{$n} } @Opt;
59
my @OptU = map { "--$_" } @Opt;
61
GetOptions(%OptO) or die "GetOptions: @OptU\n";
70
if (defined $Opt{size}) {
71
eval 'use Devel::Size qw[total_size]';
73
die "$0: --size but Devel::Size=total_size not found\n";
77
if (defined $Opt{build}) {
78
die "$0: --input with --build makes no sense\n" if defined $Opt{input};
81
die "$0: --output without --build makes no sense\n" if defined $Opt{output};
82
die "$0: --elem without --build makes no sense\n" if defined $Opt{elem};
83
die "$0: Must specify either --build or --input\n" unless defined $Opt{input};
85
if (defined ($Opt{output})) {
86
die "$0: --input with --output makes no sense\n" if defined $Opt{input};
89
$Opt{type} //= 'graph';
90
$Opt{repeat_encode} //= 1;
91
$Opt{repeat_decode} //= 5;
92
$Opt{min_max_drop_limit} //= 0;
94
my %TYPE = map { $_ => 1 } qw[aoi aoir aof aos hoi hof hos graph];
96
die "$0: Unexpected --type=$Opt{type}\n$0: Expected --type=@{[join('|', sort keys %TYPE)]}\n"
97
unless exists $TYPE{$Opt{type}};
100
my $t = Time::HiRes::time();
101
my ($u, $s, $cu, $cs) = times();
112
die "Unexpected diff(@_)\n" unless ref $_[0] eq ref $_[1];
113
bless { map { $_ => ($_[0]->{$_} - $_[1]->{$_}) } keys %{$_[0]} }, ref $_[0];
115
sub Times::wall { $_[0]->{wall} }
116
sub Times::usr { $_[0]->{usr} }
117
sub Times::sys { $_[0]->{sys} }
118
sub Times::cpu { $_[0]->{cpu} }
119
# times() can often sum just a tad higher than wallclock.
120
sub Times::pct { 100 * ($_[0]->cpu > $_[0]->wall ? 1 : $_[0]->cpu / $_[0]->wall) }
124
my $t0 = Times->new();
125
my @res = $code->(@_);
126
my $t1 = Times->new();
127
my $dt = $t1->diff($t0);
132
# The caller is supposed to have done this sorting
133
# already, but let's be wasteful and paranoid.
134
my @v = sort { $a <=> $b } @_;
137
my $med = @v % 2 ? $v[@v/2] : ($v[@v/2-1] + $v[@v/2]) / 2;
145
$sqsum += ($avg - $t) ** 2;
147
my $stddev = sqrt($sqsum / @_);
148
return ( avg => $avg,
150
rstddev => $avg ? $stddev / $avg : undef,
151
min => $min, med => $med, max => $max );
156
for my $k (qw(wall cpu)) {
157
my @v = sort { $a <=> $b } map { $_->{$k} } @_;
158
if ($Opt{min_max_drop_limit} > 0 &&
159
@v >= $Opt{min_max_drop_limit}) {
160
print "$k: dropping min and max ($v[0] and $v[-1])\n";
164
$stats{$k} = { __stats(@v) };
169
if (defined $Opt{build}) {
170
print "building data\n";
172
if ($Opt{type} eq 'graph') {
173
print "building graph\n";
175
$E = int($V * log($V)/log(2));
176
printf("data of %d (%.1fM) vertices %d (%.1fM) edges\n",
177
$V, $V / MB, $E, $E / MB);
181
my $a = int(rand($V));
182
my $b = int(rand($V));
186
} elsif ($Opt{type} eq 'aoi') {
187
print "building aoi\n";
195
} elsif ($Opt{type} eq 'aoir') {
196
print "building aoir\n";
200
for my $i (shuffle 1..$E) {
204
} elsif ($Opt{type} eq 'aof') {
205
print "building aof\n";
213
} elsif ($Opt{type} eq 'aos') {
214
print "building aos\n";
219
push @$data, rand() . $$;
222
} elsif ($Opt{type} eq 'hoi') {
223
print "building hoi\n";
231
} elsif ($Opt{type} eq 'hof') {
232
print "building hof\n";
237
$data->{$i} = rand();
240
} elsif ($Opt{type} eq 'hos') {
241
print "building hos\n";
250
die "$0: Unexpected type '$Opt{type}'\n";
252
printf("build %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f elements/sec)\n",
253
$dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct, $E / $dt->wall);
255
$dt = timeit(sub { $data_size = total_size($data);});
256
printf("data size %d bytes (%.1fMB) %.1f sec\n",
257
$data_size, $data_size / MB, $dt->wall);
260
my $encoder = Sereal::Encoder->new;
263
print "encoding data\n";
265
for my $i (1..$Opt{repeat_encode}) {
266
$dt = timeit(sub { $blob = $encoder->encode($data); });
267
$blob_size = length($blob);
268
printf("%d/%d: encode to %d bytes (%.1fMB) %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f MB/sec)\n",
269
$i, $Opt{repeat_encode}, $blob_size, $blob_size / MB, $dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct,
270
$blob_size / (MB * $dt->wall));
274
my %stats = stats(@dt);
275
for my $k (qw(wall cpu)) {
276
my $avg = $stats{$k}{avg};
277
printf("encode %-4s avg %.2f sec (%.1f MB/sec) stddev %.2f sec (%.2f) min %.2f med %.2f max %.2f\n",
279
$avg, $avg ? $blob_size / (MB * $avg) : 0, $stats{$k}{stddev}, $avg ? $stats{$k}{rstddev} : 0,
280
$stats{$k}{min}, $stats{$k}{med}, $stats{$k}{max});
285
if (defined $Opt{output}) {
286
print "opening output\n";
288
sysopen($fh, $Opt{output}, O_WRONLY|O_CREAT|O_TRUNC)
289
or die qq[sysopen "$Opt{output}": $!\n];
290
print "writing blob\n";
294
or die qq[syswrite "$Opt{otput}": $!\n] });
295
$blob_size = length($blob);
296
printf("wrote %d bytes (%.1f MB) %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f MB/sec)\n",
297
$blob_size, $blob_size / MB, $dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct,
298
$blob_size / (MB * $dt->wall));
300
} elsif (defined $Opt{input}) {
301
print "opening input\n";
303
sysopen($fh, $Opt{input}, O_RDONLY) or die qq[sysopen "$Opt{input}": $!\n];
304
print "reading blob\n";
307
sysread($fh, $blob, -s $fh)
308
or die qq[sysread "$Opt{input}": $!\n];
310
$blob_size = length($blob);
311
printf("read %d bytes (%.1f MB) %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f MB/sec)\n",
312
$blob_size, $blob_size / MB, $dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct,
313
$blob_size / (MB * $dt->wall));
316
my $decoder = Sereal::Decoder->new;
319
print "decoding blob\n";
320
$blob_size = length($blob);
322
for my $i (1..$Opt{repeat_decode}) {
323
$dt = timeit(sub { $data = $decoder->decode($blob); });
324
printf("%d/%d: decode from %d bytes (%.1fM) %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f MB/sec)\n",
325
$i, $Opt{repeat_decode}, $blob_size, $blob_size / MB,
326
$dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct, $blob_size / (MB * $dt->wall));
329
if (ref $data eq 'HASH') {
330
printf("data is hashref of %d elements\n", scalar keys %{$data});
331
} elsif (ref $data eq 'ARRAY') {
332
printf("data is hashref of %d elements\n", scalar @{$data});
333
} elsif (ref $data) {
334
printf("data is ref of %s\n", ref $data);
336
printf("data is of unexpected type\n");
339
my %stats = stats(@dt);
340
for my $k (qw(wall cpu)) {
341
my $avg = $stats{$k}{avg};
342
printf("decode %-4s avg %.2f sec (%.1f MB/sec) stddev %.2f sec (%.2f) min %.2f med %.2f max %.2f\n",
344
$avg, $avg ? $blob_size / (MB * $stats{$k}{avg}) : 0, $stats{$k}{stddev}, $avg ? $stats{$k}{rstddev} : 0,
345
$stats{$k}{min}, $stats{$k}{med}, $stats{$k}{max});
349
$dt = timeit(sub { $data_size = total_size($data); });
350
printf("data size %d bytes (%.1fMB) %.1f sec\n",
351
$data_size, $data_size / MB, $dt->wall);
356
if ($blob_size && $data_size) {
357
printf("data size / blob size %.2f\n", $data_size / $blob_size);