1
/* Speed measuring program.
3
Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
5
This file is part of the GNU MP Library.
7
The GNU MP Library is free software; you can redistribute it and/or modify
8
it under the terms of the GNU Lesser General Public License as published by
9
the Free Software Foundation; either version 2.1 of the License, or (at your
10
option) any later version.
12
The GNU MP Library is distributed in the hope that it will be useful, but
13
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15
License for more details.
17
You should have received a copy of the GNU Lesser General Public License
18
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20
MA 02111-1307, USA. */
22
/* Usage message is in the code below, run with no arguments to print it.
23
See README for interesting applications.
25
To add a new routine foo(), create a speed_foo() function in the style of
26
the existing ones and add an entry in the routine[] array. Put FLAG_R if
27
speed_foo() wants an "r" parameter.
29
The routines don't have help messages or descriptions, but most have
30
suggestive names. See the source code for full details.
42
#include <unistd.h> /* for getpid, R_OK */
45
#if TIME_WITH_SYS_TIME
46
# include <sys/time.h> /* for struct timeval */
50
# include <sys/time.h>
56
#if HAVE_SYS_RESOURCE_H
57
#include <sys/resource.h> /* for getrusage() */
63
#include "longlong.h" /* for the benefit of speed-many.c */
70
extern int optind, opterr;
74
#define strtoul(p,e,b) (unsigned long) strtol(p,e,b)
77
#ifdef SPEED_EXTRA_PROTOS
80
#ifdef SPEED_EXTRA_PROTOS2
85
#define MPN_FILL(ptr, size, n) \
88
ASSERT ((size) >= 0); \
89
for (__i = 0; __i < (size); __i++) \
93
#define CMP_ABSOLUTE 1
95
#define CMP_DIFFERENCE 3
96
#define CMP_DIFFPREV 4
97
int option_cmp = CMP_ABSOLUTE;
99
#define UNIT_SECONDS 1
100
#define UNIT_CYCLES 2
101
#define UNIT_CYCLESPERLIMB 3
102
int option_unit = UNIT_SECONDS;
104
#define DATA_RANDOM 1
105
#define DATA_RANDOM2 2
109
int option_data = DATA_RANDOM;
111
int option_square = 0;
112
double option_factor = 0.0;
113
mp_size_t option_step = 1;
114
int option_gnuplot = 0;
115
char *option_gnuplot_basename;
116
struct size_array_t {
117
mp_size_t start, end;
118
} *size_array = NULL;
119
mp_size_t size_num = 0;
120
mp_size_t size_allocnum = 0;
121
int option_resource_usage = 0;
122
long option_seed = 123456789;
124
struct speed_params sp;
126
#define COLUMN_WIDTH 13 /* for the free-form output */
128
#define FLAG_R (1<<0) /* require ".r" */
129
#define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */
130
#define FLAG_RSIZE (1<<2)
131
#define FLAG_NODATA (1<<3) /* don't alloc xp, yp */
133
const struct routine_t {
136
speed_function_t fun;
141
{ "noop", speed_noop },
142
{ "noop_wxs", speed_noop_wxs },
143
{ "noop_wxys", speed_noop_wxys },
145
{ "mpn_add_n", speed_mpn_add_n },
146
{ "mpn_sub_n", speed_mpn_sub_n },
147
{ "mpn_add_n_self", speed_mpn_add_n_self },
148
{ "mpn_add_n_inplace", speed_mpn_add_n_inplace },
150
{ "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R },
151
{ "mpn_submul_1", speed_mpn_submul_1, FLAG_R },
152
{ "mpn_mul_1", speed_mpn_mul_1, FLAG_R },
153
{ "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
154
#if HAVE_NATIVE_mpn_mul_2
155
{ "mpn_mul_2", speed_mpn_mul_2 },
158
{ "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R },
159
{ "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R },
160
#if HAVE_NATIVE_mpn_divrem_1c
161
{ "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R },
162
{ "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R },
164
{ "mpn_mod_1", speed_mpn_mod_1, FLAG_R },
165
#if HAVE_NATIVE_mpn_mod_1c
166
{ "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R },
168
{ "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R },
170
{ "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R },
171
{ "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R },
172
{ "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
173
{ "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
174
{ "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R },
175
{ "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R },
177
{ "mpn_divrem_2", speed_mpn_divrem_2, },
178
{ "mpn_divrem_2_div", speed_mpn_divrem_2_div, },
179
{ "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, },
181
{ "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R },
182
{ "mpn_divexact_by3", speed_mpn_divexact_by3 },
184
#if HAVE_NATIVE_mpn_modexact_1c_odd
185
{ "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R },
187
{ "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
189
{ "mpn_dc_tdiv_qr", speed_mpn_dc_tdiv_qr },
190
{ "mpn_dc_divrem_n", speed_mpn_dc_divrem_n },
191
{ "mpn_dc_divrem_sb", speed_mpn_dc_divrem_sb },
192
{ "mpn_dc_divrem_sb_div", speed_mpn_dc_divrem_sb_div },
193
{ "mpn_dc_divrem_sb_inv", speed_mpn_dc_divrem_sb_inv },
195
{ "mpn_sb_divrem_m3", speed_mpn_sb_divrem_m3 },
196
{ "mpn_sb_divrem_m3_div", speed_mpn_sb_divrem_m3_div },
197
{ "mpn_sb_divrem_m3_inv", speed_mpn_sb_divrem_m3_inv },
199
{ "mpn_lshift", speed_mpn_lshift, FLAG_R },
200
{ "mpn_rshift", speed_mpn_rshift, FLAG_R },
202
{ "mpn_and_n", speed_mpn_and_n },
203
{ "mpn_andn_n", speed_mpn_andn_n },
204
{ "mpn_nand_n", speed_mpn_nand_n },
205
{ "mpn_ior_n", speed_mpn_ior_n },
206
{ "mpn_iorn_n", speed_mpn_iorn_n },
207
{ "mpn_nior_n", speed_mpn_nior_n },
208
{ "mpn_xor_n", speed_mpn_xor_n },
209
{ "mpn_xnor_n", speed_mpn_xnor_n },
210
{ "mpn_com_n", speed_mpn_com_n },
212
{ "mpn_popcount", speed_mpn_popcount },
213
{ "mpn_hamdist", speed_mpn_hamdist },
215
{ "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
216
{ "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
218
{ "mpn_gcd", speed_mpn_gcd },
219
{ "mpn_gcd_binary", speed_mpn_gcd_binary },
220
{ "find_a", speed_find_a, FLAG_NODATA },
221
#if HAVE_NATIVE_mpn_gcd_finda
222
{ "mpn_gcd_finda", speed_mpn_gcd_finda, FLAG_NODATA },
225
{ "mpn_gcdext", speed_mpn_gcdext },
226
{ "mpn_gcdext_single", speed_mpn_gcdext_single },
227
{ "mpn_gcdext_double", speed_mpn_gcdext_double },
228
{ "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
229
{ "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
231
{ "mpz_jacobi", speed_mpz_jacobi },
232
{ "mpn_jacobi_base", speed_mpn_jacobi_base },
234
{ "mpn_mul_basecase", speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
235
{ "mpn_sqr_basecase", speed_mpn_sqr_basecase },
236
#if HAVE_NATIVE_mpn_sqr_diagonal
237
{ "mpn_sqr_diagonal", speed_mpn_sqr_diagonal },
240
{ "mpn_mul_n", speed_mpn_mul_n },
241
{ "mpn_sqr_n", speed_mpn_sqr_n },
243
{ "mpn_kara_mul_n", speed_mpn_kara_mul_n },
244
{ "mpn_kara_sqr_n", speed_mpn_kara_sqr_n },
245
{ "mpn_toom3_mul_n", speed_mpn_toom3_mul_n },
246
{ "mpn_toom3_sqr_n", speed_mpn_toom3_sqr_n },
247
{ "mpn_mul_fft_full", speed_mpn_mul_fft_full },
248
{ "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
250
{ "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
251
{ "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
253
{ "mpn_toom3_mul_n_mpn", speed_mpn_toom3_mul_n_mpn },
254
{ "mpn_toom3_mul_n_open", speed_mpn_toom3_mul_n_open },
255
{ "mpn_toom3_sqr_n_mpn", speed_mpn_toom3_sqr_n_mpn },
256
{ "mpn_toom3_sqr_n_open", speed_mpn_toom3_sqr_n_open },
258
{ "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL },
259
{ "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL },
261
{ "mpn_sqrtrem", speed_mpn_sqrtrem },
263
{ "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA },
264
{ "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA },
265
{ "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA },
266
{ "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA },
267
{ "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA },
269
{ "mpz_add", speed_mpz_add },
270
{ "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
271
{ "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA },
272
{ "mpz_powm", speed_mpz_powm },
273
{ "mpz_powm_mod", speed_mpz_powm_mod },
274
{ "mpz_powm_redc", speed_mpz_powm_redc },
275
{ "mpz_powm_ui", speed_mpz_powm_ui },
277
{ "mpz_mod", speed_mpz_mod },
278
{ "redc", speed_redc },
280
{ "MPN_COPY", speed_MPN_COPY },
281
{ "MPN_COPY_INCR", speed_MPN_COPY_INCR },
282
{ "MPN_COPY_DECR", speed_MPN_COPY_DECR },
283
{ "memcpy", speed_memcpy },
284
#if HAVE_NATIVE_mpn_copyi
285
{ "mpn_copyi", speed_mpn_copyi },
287
#if HAVE_NATIVE_mpn_copyd
288
{ "mpn_copyd", speed_mpn_copyd },
291
{ "MPN_ZERO", speed_MPN_ZERO },
293
{ "modlimb_invert", speed_modlimb_invert, FLAG_NODATA },
294
{ "modlimb_invert_mul1", speed_modlimb_invert_mul1, FLAG_NODATA },
295
{ "modlimb_invert_loop", speed_modlimb_invert_loop, FLAG_NODATA },
296
{ "modlimb_invert_cond", speed_modlimb_invert_cond, FLAG_NODATA },
297
{ "modlimb_invert_arith", speed_modlimb_invert_arith, FLAG_NODATA },
299
{ "malloc_free", speed_malloc_free },
300
{ "malloc_realloc_free", speed_malloc_realloc_free },
301
{ "gmp_allocate_free", speed_gmp_allocate_free },
302
{ "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
303
{ "mpz_init_clear", speed_mpz_init_clear },
304
{ "mpq_init_clear", speed_mpq_init_clear },
305
{ "mpf_init_clear", speed_mpf_init_clear },
306
{ "mpz_init_realloc_clear", speed_mpz_init_realloc_clear },
308
{ "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL },
309
#if HAVE_NATIVE_mpn_umul_ppmm
310
{ "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
313
{ "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
314
{ "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
316
{ "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL },
317
{ "udiv_qrnnd_preinv", speed_udiv_qrnnd_preinv, FLAG_R_OPTIONAL },
318
{ "udiv_qrnnd_preinv2norm", speed_udiv_qrnnd_preinv2norm, FLAG_R_OPTIONAL },
319
{ "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL },
320
#if HAVE_NATIVE_mpn_udiv_qrnnd
321
{ "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL },
323
{ "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL },
325
{ "operator_div", speed_operator_div, FLAG_R_OPTIONAL },
326
{ "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL },
328
#ifdef SPEED_EXTRA_ROUTINES
331
#ifdef SPEED_EXTRA_ROUTINES2
332
SPEED_EXTRA_ROUTINES2
338
const struct routine_t *p;
346
struct choice_t *choice;
351
data_fill (mp_ptr ptr, mp_size_t size)
353
switch (option_data) {
355
mpn_random (ptr, size);
358
mpn_random2 (ptr, size);
361
MPN_ZERO (ptr, size);
364
MPN_FILL (ptr, size, MP_LIMB_T_MAX);
367
MPN_FILL (ptr, size, MP_LIMB_T_MAX);
376
/* The code here handling the various combinations of output options isn't
377
too attractive, but it works and is fairly clean. */
379
#define SIZE_TO_DIVISOR(n) \
380
(option_square == 1 ? (n)*(n) \
381
: option_square == 2 ? (n)*((n)+1)/2 \
385
run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
387
const char *first_open_fastest, *first_open_notfastest, *first_close;
388
int i, fastest, want_data;
394
/* allocate data, unless all routines are NODATA */
396
for (i = 0; i < num_choices; i++)
397
want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
401
sp.xp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_xp);
402
sp.yp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_yp);
404
data_fill (s->xp, s->size);
405
data_fill (s->yp, s->size);
413
if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
415
first_open_fastest = "(#";
416
first_open_notfastest = " (";
421
first_open_fastest = "#";
422
first_open_notfastest = " ";
428
for (i = 0; i < num_choices; i++)
431
choice[i].time = speed_measure (choice[i].p->fun, s);
432
choice[i].no_time = (choice[i].time == -1.0);
433
choice[i].time *= choice[i].scale;
435
/* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
436
is before any differences. */
440
if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
442
if (choice[i].prev_time == -1.0)
443
choice[i].no_time = 1;
445
choice[i].time = choice[i].time - choice[i].prev_time;
447
choice[i].prev_time = t;
450
if (choice[i].no_time)
453
/* Look for the fastest after CMP_DIFFPREV has been applied, but
454
before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown
455
if there's more than one routine. */
456
if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
459
fastest_time = choice[i].time;
462
if (option_cmp == CMP_DIFFPREV)
464
/* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
465
if (option_unit == UNIT_CYCLES)
466
choice[i].time /= speed_cycletime;
467
else if (option_unit == UNIT_CYCLESPERLIMB)
470
choice[i].time /= speed_cycletime;
472
choice[i].time /= (speed_cycletime
473
* (SIZE_TO_DIVISOR(s->size)
474
- SIZE_TO_DIVISOR(prev_size)));
479
if (option_unit == UNIT_CYCLES)
480
choice[i].time /= speed_cycletime;
481
else if (option_unit == UNIT_CYCLESPERLIMB)
482
choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
484
if (option_cmp == CMP_RATIO && i > 0)
486
/* A ratio isn't affected by the units chosen. */
487
if (choice[0].no_time || choice[0].time == 0.0)
488
choice[i].no_time = 1;
490
choice[i].time /= choice[0].time;
492
else if (option_cmp == CMP_DIFFERENCE && i > 0)
494
if (choice[0].no_time)
496
choice[i].no_time = 1;
499
choice[i].time -= choice[0].time;
506
/* In CMP_DIFFPREV, don't print anything for the first size, start
507
with the second where an actual difference is available.
509
In CMP_RATIO, print the first column as 1.0.
511
The 9 decimals printed is much more than the expected precision of
512
the measurements actually. */
514
if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
516
fprintf (fp, "%-6ld ", s->size);
517
for (i = 0; i < num_choices; i++)
518
fprintf (fp, " %.9e",
519
choice[i].no_time ? 0.0
520
: (option_cmp == CMP_RATIO && i == 0) ? 1.0
527
fprintf (fp, "%-6ld ", s->size);
528
for (i = 0; i < num_choices; i++)
533
if (choice[i].no_time)
534
decimals = 0, choice[i].time = 0.0;
535
else if (option_unit == UNIT_CYCLESPERLIMB
536
|| (option_cmp == CMP_RATIO && i > 0))
538
else if (option_unit == UNIT_CYCLES)
543
sprintf (buf, "%s%.*f%s",
544
i == fastest ? first_open_fastest : first_open_notfastest,
545
decimals, choice[i].time, first_close);
546
fprintf (fp, " %*s", COLUMN_WIDTH, buf);
562
sp.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_xp);
563
sp.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_yp);
565
data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
566
data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
568
for (i = 0; i < size_num; i++)
570
sp.size = size_array[i].start;
576
if (option_data == DATA_2FD && sp.size >= 2)
577
sp.xp[sp.size-1] = 2;
579
run_one (fp, &sp, prev_size);
582
if (option_data == DATA_2FD && sp.size >= 2)
583
sp.xp[sp.size-1] = MP_LIMB_T_MAX;
585
if (option_factor != 0.0)
587
step = (mp_size_t) (sp.size * option_factor - sp.size);
593
if (step < option_step)
597
if (sp.size > size_array[i].end)
607
fopen_for_write (const char *filename)
610
if ((fp = fopen (filename, "w")) == NULL)
612
fprintf (stderr, "Cannot create %s\n", filename);
619
fclose_written (FILE *fp, const char *filename)
628
fprintf (stderr, "Error writing %s\n", filename);
635
run_gnuplot (int argc, char *argv[])
642
plot_filename = (char *) (*__gmp_allocate_func)
643
(strlen (option_gnuplot_basename) + 20);
644
data_filename = (char *) (*__gmp_allocate_func)
645
(strlen (option_gnuplot_basename) + 20);
647
sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
648
sprintf (data_filename, "%s.data", option_gnuplot_basename);
650
fp = fopen_for_write (plot_filename);
652
fprintf (fp, "# Generated with:\n");
654
for (i = 0; i < argc; i++)
655
fprintf (fp, " %s", argv[i]);
659
/* Putting the key at the top left is usually good, and you can change it
660
interactively if it's not. */
661
fprintf (fp, "set key left\n");
663
/* designed to make it possible to see crossovers easily */
664
fprintf (fp, "set data style linespoints\n");
666
fprintf (fp, "plot ");
667
for (i = 0; i < num_choices; i++)
669
fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
670
fprintf (fp, " title \"%s\"", choice[i].name);
672
if (i != num_choices-1)
673
fprintf (fp, ", \\");
677
fprintf (fp, "load \"-\"\n");
678
fclose_written (fp, plot_filename);
680
fp = fopen_for_write (data_filename);
682
/* Unbuffered so you can see where the program was up to if it crashes or
687
fclose_written (fp, data_filename);
691
/* Return a long with n many one bits (starting from the least significant) */
693
#define LIMB_ONES(n) \
694
((n) == BITS_PER_MP_LIMB ? -1L : (n) == 0 ? 0L : (1L << (n)) - 1)
697
r_string (const char *s)
699
const char *s_orig = s;
708
set = mpz_set_str (z, s, 0);
710
l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
714
if (siz > 1 || siz < -1)
715
printf ("Warning, r parameter %s truncated to %d bits\n",
716
s_orig, BITS_PER_MP_LIMB);
721
if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
722
n = strtoul (s+2, (char **) &s, 16);
724
n = strtol (s, (char **) &s, 10);
726
if (strcmp (s, "bits") == 0)
729
if (n > BITS_PER_MP_LIMB)
731
fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
732
n, BITS_PER_MP_LIMB);
736
return (l | (1 << (n-1))) & LIMB_ONES(n);
738
else if (strcmp (s, "ones") == 0)
740
if (n > BITS_PER_MP_LIMB)
742
fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
743
n, BITS_PER_MP_LIMB);
746
return LIMB_ONES (n);
750
fprintf (stderr, "invalid r parameter: %s\n", s_orig);
759
routine_find (struct choice_t *c, const char *s_orig)
766
s = strchr (s_orig, '*');
769
c->scale = atof(s_orig);
778
for (i = 0; i < numberof (routine); i++)
780
nlen = strlen (routine[i].name);
781
if (memcmp (s, routine[i].name, nlen) != 0)
786
/* match, with a .r parameter */
788
if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
791
"Choice %s bad: doesn't take a \".<r>\" parameter\n",
797
c->r = r_string (s + nlen + 1);
803
/* match, with no parameter */
805
if (routine[i].flag & FLAG_R)
808
"Choice %s bad: needs a \".<r>\" parameter\n",
819
fprintf (stderr, "Choice %s unrecognised\n", s_orig);
832
Usage: speed [-options] -s size <routine>...\n\
833
Measure the speed of some routines.\n\
834
Times are in seconds, accuracy is shown.\n\
836
-p num set precision as number of time units each routine must run\n\
837
-s size[-end][,size[-end]]... sizes to measure\n\
838
single sizes or ranges, sep with comma or use multiple -s\n\
839
-t step step through sizes by given amount\n\
840
-f factor step through sizes by given factor (eg. 1.05)\n\
841
-r show times as ratios of the first routine\n\
842
-d show times as difference from the first routine\n\
843
-D show times as difference from previous size shown\n\
844
-c show times in CPU cycles\n\
845
-C show times in cycles per limb\n\
846
-u print resource usage (memory) at end\n\
847
-P name output plot files \"name.gnuplot\" and \"name.data\"\n\
848
-a <type> use given data: random(default), random2, zeros, ffs\n\
849
-x, -y, -w, -W <align> specify data alignments, sources and dests\n\
850
-o addrs print addresses of data blocks\n\
852
If both -t and -f are used, it means step by the factor or the step, whichever\n\
854
If both -C and -D are used, it means cycles per however many limbs between a\n\
855
size and the previous size.\n\
857
After running with -P, plots can be viewed with Gnuplot or Quickplot.\n\
858
\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n\
860
\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n\
861
when viewing more than one routine, it means same axis scales for all data).\n\
863
The available routines are as follows.\n\
867
for (i = 0; i < numberof (routine); i++)
869
if (routine[i].flag & FLAG_R)
870
printf ("\t%s.r\n", routine[i].name);
871
else if (routine[i].flag & FLAG_R_OPTIONAL)
872
printf ("\t%s (optional .r)\n", routine[i].name);
874
printf ("\t%s\n", routine[i].name);
878
Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n\
879
r should be in decimal, or use 0xN for hexadecimal.\n\
880
Special forms for r are Nbits for a random N bit number, and Nones for N one\n\
883
Times for sizes out of the range accepted by a routine are shown as 0.\n\
884
The fastest routine at each size is marked with a # (free form output only).\n\
888
Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\n\
889
Quickplot home page http://www.kachinatech.com/~quickplot\n\
890
", speed_time_string);
894
main (int argc, char *argv[])
899
/* Unbuffered so output goes straight out when directed to a pipe or file
900
and isn't lost on killing the program half way. */
901
setbuf (stdout, NULL);
905
opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
911
if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM;
912
else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
913
else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
914
else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
915
else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
918
fprintf (stderr, "unrecognised data option: %s\n", optarg);
923
if (option_unit != UNIT_SECONDS) goto bad_unit;
924
option_unit = UNIT_CYCLESPERLIMB;
927
if (option_unit != UNIT_SECONDS)
930
fprintf (stderr, "cannot use more than one of -c, -C\n");
933
option_unit = UNIT_CYCLES;
936
if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
937
option_cmp = CMP_DIFFPREV;
940
if (option_cmp != CMP_ABSOLUTE)
943
fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
946
option_cmp = CMP_DIFFERENCE;
955
option_factor = atof (optarg);
956
if (option_factor <= 1.0)
958
fprintf (stderr, "-f factor must be > 1.0\n");
963
speed_option_set (optarg);
967
option_gnuplot_basename = optarg;
970
speed_precision = atoi (optarg);
973
option_seed = time (NULL);
976
if (option_cmp != CMP_ABSOLUTE)
978
option_cmp = CMP_RATIO;
983
for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
985
if (size_num == size_allocnum)
987
size_array = (struct size_array_t *)
988
__gmp_allocate_or_reallocate
990
size_allocnum * sizeof(size_array[0]),
991
(size_allocnum+10) * sizeof(size_array[0]));
994
if (sscanf (s, "%ld-%ld",
995
&size_array[size_num].start,
996
&size_array[size_num].end) != 2)
998
size_array[size_num].start = size_array[size_num].end
1002
if (size_array[size_num].start < 0
1003
|| size_array[size_num].end < 0
1004
|| size_array[size_num].start > size_array[size_num].end)
1006
fprintf (stderr, "invalid size parameter: %s\n", s);
1015
option_step = atol (optarg);
1016
if (option_step < 1)
1018
fprintf (stderr, "-t step must be >= 1\n");
1023
option_resource_usage = 1;
1029
sp.align_xp = atol (optarg);
1032
sp.align_yp = atol (optarg);
1035
sp.align_wp = atol (optarg);
1038
sp.align_wp2 = atol (optarg);
1053
fprintf (stderr, "-s <size> must be specified\n");
1057
gmp_randseed_ui (RANDS, option_seed);
1059
choice = (struct choice_t *) (*__gmp_allocate_func)
1060
((argc - optind) * sizeof(choice[0]));
1061
for ( ; optind < argc; optind++)
1064
routine_find (&c, argv[optind]);
1065
choice[num_choices] = c;
1069
if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
1072
fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
1076
if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
1077
speed_cycletime_need_cycles ();
1079
speed_cycletime_need_seconds ();
1083
run_gnuplot (argc, argv);
1087
if (option_unit == UNIT_SECONDS)
1088
printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
1090
printf ("overhead %.2f cycles",
1091
speed_measure (speed_noop, NULL) / speed_cycletime);
1092
printf (", precision %d units of %.2e secs",
1093
speed_precision, speed_unittime);
1095
if (speed_cycletime == 1.0)
1096
printf (", CPU freq unknown\n");
1098
printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
1101
for (i = 0; i < num_choices; i++)
1102
printf (" %*s", COLUMN_WIDTH, choice[i].name);
1108
if (option_resource_usage)
1112
/* This doesn't give data sizes on linux 2.0.x, only utime. */
1114
if (getrusage (RUSAGE_SELF, &r) != 0)
1115
perror ("getrusage");
1117
printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
1118
r.ru_utime.tv_sec, r.ru_utime.tv_usec,
1119
r.ru_idrss, r.ru_isrss, r.ru_ixrss);
1122
printf ("getrusage() not available\n");
1128
sprintf (buf, "/proc/%d/status", getpid());
1129
if (access (buf, R_OK) == 0)
1131
sprintf (buf, "cat /proc/%d/status", getpid());