1
/* Run some tests on various mpn routines.
3
THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
4
BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
6
Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
8
This file is part of the GNU MP Library.
10
The GNU MP Library is free software; you can redistribute it and/or modify
11
it under the terms of the GNU Lesser General Public License as published by
12
the Free Software Foundation; either version 2.1 of the License, or (at your
13
option) any later version.
15
The GNU MP Library is distributed in the hope that it will be useful, but
16
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
18
License for more details.
20
You should have received a copy of the GNU Lesser General Public License
21
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
22
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
23
MA 02111-1307, USA. */
26
/* Usage: try [options] <function>...
28
For example, "./try mpn_add_n" to run tests of that function.
30
Combinations of alignments and overlaps are tested, with redzones above
31
or below the destinations, and with the sources write-protected.
33
The number of tests performed becomes ridiculously large with all the
34
combinations, and for that reason this can't be a part of a "make check",
35
it's meant only for development. The code isn't very pretty either.
37
During development it can help to disable the redzones, since seeing the
38
rest of the destination written can show where the wrong part is, or if
39
the dst pointers are off by 1 or whatever. The magic DEADVAL initial
40
fill (see below) will show locations never written.
42
The -s option can be used to test only certain size operands, which is
43
useful if some new code doesn't yet support say sizes less than the
44
unrolling, or whatever.
46
When a problem occurs it'll of course be necessary to run the program
47
under gdb to find out quite where, how and why it's going wrong. Disable
48
the spinner with the -W option when doing this, or single stepping won't
49
work. Using the "-1" option to run with simple data can be useful.
51
New functions to test can be added in try_array[]. If a new TYPE is
52
required then add it to the existing constants, set up its parameters in
53
param_init(), and add it to the call() function. Extra parameter fields
54
can be added if necessary, or further interpretations given to existing
60
umul_ppmm support is not very good, lots of source data is generated
61
whereas only two limbs are needed.
66
Make a little scheme for interpreting the "SIZE" selections uniformly.
68
Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
69
source limbs. Possibly increase the default repetitions in that case.
71
Automatically detect gdb and disable the spinner (use -W for now).
73
Make a way to re-run a failing case in the debugger. Have an option to
74
snapshot each test case before it's run so the data is available if a
75
segv occurs. (This should be more reliable than the current print_all()
76
in the signal handler.)
78
When alignment means a dst isn't hard against the redzone, check the
79
space in between remains unchanged.
81
See if the 80x86 debug registers can do redzones on byte boundaries.
83
When a source overlaps a destination, don't run both s[i].high 0 and 1,
84
as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
86
When partial overlaps aren't done, don't loop over source alignments
89
Try to make the looping code a bit less horrible. Right now it's pretty
90
hard to see what iterations are actually done.
92
When there's no overlap, run with both src>dst and src<dst. A subtle
93
calling-conventions violation occured in a P6 copy which depended on the
94
relative location of src and dst.
99
/* always do assertion checking */
100
#define WANT_ASSERT 1
117
#include <sys/mman.h>
121
#include "gmp-impl.h"
122
#include "longlong.h"
127
#define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
129
#define ASSERT_CARRY(expr) (expr)
133
#if !HAVE_DECL_OPTARG
135
extern int optind, opterr;
138
/* Rumour has it some systems lack a define of PROT_NONE. */
143
/* Dummy defines for when mprotect doesn't exist. */
151
/* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
152
_SC_PAGE_SIZE instead. */
153
#if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
154
#define _SC_PAGESIZE _SC_PAGE_SIZE
166
#define DEFAULT_REPETITIONS 10
168
int option_repetitions = DEFAULT_REPETITIONS;
169
int option_spinner = 1;
170
int option_redzones = 1;
171
int option_firstsize = 0;
172
int option_lastsize = 500;
173
int option_firstsize2 = 0;
177
#define CARRY_RANDOMS 5
178
#define MULTIPLIER_RANDOMS 5
179
#define DIVISOR_RANDOMS 5
180
#define FRACTION_COUNT 4
182
int option_print = 0;
189
int option_data = DATA_TRAND;
193
#define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB)
195
/* must be a multiple of the page size */
196
#define REDZONE_BYTES (pagesize * 16)
197
#define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB)
200
#define MAX3(x,y,z) (MAX (x, MAX (y, z)))
202
#if BITS_PER_MP_LIMB == 32
203
#define DEADVAL CNST_LIMB(0xDEADBEEF)
205
#define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
215
#define TRAP_NOWHERE 0
218
#define TRAP_SETUPS 3
219
int trap_location = TRAP_NOWHERE;
222
#define NUM_SOURCES 2
226
struct region_t region;
232
struct source_t s[NUM_SOURCES];
240
struct dest_t d[NUM_DESTS];
242
struct source_each_t {
247
struct region_t region;
256
mp_limb_t multiplier;
260
struct dest_each_t d[NUM_DESTS];
261
struct source_each_t s[NUM_SOURCES];
265
struct each_t ref = { "Ref" };
266
struct each_t fun = { "Fun" };
268
#define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
270
void validate_fail _PROTO ((void));
278
typedef mp_limb_t (*tryfun_t) _PROTO ((ANYARGS));
287
#define SIZE_ALLOW_ZERO 2
288
#define SIZE_1 3 /* 1 limb */
289
#define SIZE_2 4 /* 2 limbs */
290
#define SIZE_3 5 /* 3 limbs */
291
#define SIZE_FRACTION 6 /* size2 is fraction for divrem etc */
293
#define SIZE_PLUS_1 8
296
#define SIZE_DIFF_PLUS_1 11
297
#define SIZE_RETVAL 12
298
#define SIZE_CEIL_HALF 13
299
#define SIZE_GET_STR 14
308
#define CARRY_BIT 1 /* single bit 0 or 1 */
309
#define CARRY_3 2 /* 0, 1, 2 */
310
#define CARRY_4 3 /* 0 to 3 */
311
#define CARRY_LIMB 4 /* any limb value */
312
#define CARRY_DIVISOR 5 /* carry<divisor */
315
/* a fudge to tell the output when to print negatives */
321
#define DIVISOR_LIMB 1
322
#define DIVISOR_NORM 2
323
#define DIVISOR_ODD 3
326
#define DATA_NON_ZERO 1
328
#define DATA_SRC1_ODD 3
329
#define DATA_SRC1_HIGHBIT 4
330
#define DATA_MULTIPLE_DIVISOR 5
331
#define DATA_UDIV_QRNND 6
334
/* Default is allow full overlap. */
335
#define OVERLAP_NONE 1
336
#define OVERLAP_LOW_TO_HIGH 2
337
#define OVERLAP_HIGH_TO_LOW 3
338
#define OVERLAP_NOT_SRCS 4
339
#define OVERLAP_NOT_SRC2 8
343
const char *reference_name;
345
void (*validate) _PROTO ((void));
346
const char *validate_name;
353
validate_mod_34lsub1 (void)
355
#define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (BITS_PER_MP_LIMB / 4))) - 1)
357
mp_srcptr ptr = s[0].p;
359
mp_limb_t got, got_mod, want, want_mod;
364
got_mod = got % CNST_34LSUB1;
366
want = refmpn_mod_34lsub1 (ptr, size);
367
want_mod = want % CNST_34LSUB1;
369
if (got_mod != want_mod)
371
printf ("got 0x%lX reduced from 0x%lX\n", got_mod, got);
372
printf ("want 0x%lX reduced from 0x%lX\n", want_mod, want);
381
validate_divexact_1 (void)
383
mp_srcptr src = s[0].p;
384
mp_srcptr dst = fun.d[0].p;
390
mp_ptr tp = refmpn_malloc_limbs (size);
393
rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
396
printf ("Remainder a%%d == 0x%lX, mpn_divexact_1 undefined\n", rem);
399
if (! refmpn_equal_anynail (tp, dst, size))
401
printf ("Quotient a/d wrong\n");
402
mpn_trace ("fun ", dst, size);
403
mpn_trace ("want", tp, size);
415
validate_modexact_1c_odd (void)
417
mp_srcptr ptr = s[0].p;
418
mp_limb_t r = fun.retval;
422
ASSERT (divisor & 1);
428
printf ("Don't have r < divisor\n");
432
else /* carry >= divisor */
434
if (! (r <= divisor))
436
printf ("Don't have r <= divisor\n");
442
mp_limb_t c = carry % divisor;
443
mp_ptr tp = refmpn_malloc_limbs (size+1);
446
for (k = size-1; k <= size; k++)
448
/* set {tp,size+1} to r*b^k + a - c */
449
refmpn_copyi (tp, ptr, size);
451
ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
452
if (refmpn_sub_1 (tp, tp, size+1, c))
453
ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
455
if (refmpn_mod_1 (tp, size+1, divisor) == 0)
458
printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
470
validate_modexact_1_odd (void)
473
validate_modexact_1c_odd ();
478
validate_sqrtrem (void)
480
mp_srcptr orig_ptr = s[0].p;
481
mp_size_t orig_size = size;
482
mp_size_t root_size = (size+1)/2;
483
mp_srcptr root_ptr = fun.d[0].p;
484
mp_size_t rem_size = fun.retval;
485
mp_srcptr rem_ptr = fun.d[1].p;
486
mp_size_t prod_size = 2*root_size;
490
if (rem_size < 0 || rem_size > size)
492
printf ("Bad remainder size retval %ld\n", rem_size);
496
p = refmpn_malloc_limbs (prod_size);
498
p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
499
if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
501
printf ("Remainder bigger than 2*root\n");
505
refmpn_sqr (p, root_ptr, root_size);
507
refmpn_add (p, p, prod_size, rem_ptr, rem_size);
508
if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
510
printf ("root^2+rem != original\n");
511
mpn_trace ("prod", p, prod_size);
521
/* These types are indexes into the param[] array and are arbitrary so long
522
as they're all distinct and within the size of param[]. Renumber
523
whenever necessary or desired. */
527
#define TYPE_ADD_NC 3
530
#define TYPE_SUB_NC 6
533
#define TYPE_MUL_1C 8
537
#define TYPE_ADDMUL_1 10
538
#define TYPE_ADDMUL_1C 11
539
#define TYPE_SUBMUL_1 12
540
#define TYPE_SUBMUL_1C 13
542
#define TYPE_ADDSUB_N 14
543
#define TYPE_ADDSUB_NC 15
545
#define TYPE_RSHIFT 16
546
#define TYPE_LSHIFT 17
549
#define TYPE_COPYI 21
550
#define TYPE_COPYD 22
551
#define TYPE_COM_N 23
553
#define TYPE_MOD_1 25
554
#define TYPE_MOD_1C 26
555
#define TYPE_DIVMOD_1 27
556
#define TYPE_DIVMOD_1C 28
557
#define TYPE_DIVREM_1 29
558
#define TYPE_DIVREM_1C 30
559
#define TYPE_PREINV_DIVREM_1 31
560
#define TYPE_PREINV_MOD_1 32
561
#define TYPE_MOD_34LSUB1 33
562
#define TYPE_UDIV_QRNND 34
564
#define TYPE_DIVEXACT_1 35
565
#define TYPE_DIVEXACT_BY3 36
566
#define TYPE_DIVEXACT_BY3C 37
568
#define TYPE_MODEXACT_1_ODD 38
569
#define TYPE_MODEXACT_1C_ODD 39
572
#define TYPE_GCD_1 41
573
#define TYPE_GCD_FINDA 42
574
#define TYPE_MPZ_JACOBI 43
575
#define TYPE_MPZ_KRONECKER 44
576
#define TYPE_MPZ_KRONECKER_UI 45
577
#define TYPE_MPZ_KRONECKER_SI 46
578
#define TYPE_MPZ_UI_KRONECKER 47
579
#define TYPE_MPZ_SI_KRONECKER 48
581
#define TYPE_AND_N 50
582
#define TYPE_NAND_N 51
583
#define TYPE_ANDN_N 52
584
#define TYPE_IOR_N 53
585
#define TYPE_IORN_N 54
586
#define TYPE_NIOR_N 55
587
#define TYPE_XOR_N 56
588
#define TYPE_XNOR_N 57
590
#define TYPE_POPCOUNT 58
591
#define TYPE_HAMDIST 59
593
#define TYPE_MUL_BASECASE 60
594
#define TYPE_MUL_N 61
596
#define TYPE_UMUL_PPMM 63
597
#define TYPE_UMUL_PPMM_R 64
599
#define TYPE_SB_DIVREM_MN 70
600
#define TYPE_TDIV_QR 71
602
#define TYPE_SQRTREM 80
604
#define TYPE_GET_STR 82
606
#define TYPE_EXTRA 90
608
struct try_t param[150];
616
#define COPY(index) memcpy (p, ¶m[index], sizeof (*p))
619
#define REFERENCE(fun) \
620
p->reference = (tryfun_t) fun; \
621
p->reference_name = #fun
622
#define VALIDATE(fun) \
624
p->validate_name = #fun
626
#define REFERENCE(fun) \
627
p->reference = (tryfun_t) fun; \
628
p->reference_name = "fun"
629
#define VALIDATE(fun) \
631
p->validate_name = "fun"
635
p = ¶m[TYPE_ADD_N];
640
REFERENCE (refmpn_add_n);
642
p = ¶m[TYPE_ADD_NC];
644
p->carry = CARRY_BIT;
645
REFERENCE (refmpn_add_nc);
647
p = ¶m[TYPE_SUB_N];
649
REFERENCE (refmpn_sub_n);
651
p = ¶m[TYPE_SUB_NC];
653
REFERENCE (refmpn_sub_nc);
655
p = ¶m[TYPE_ADD];
657
p->size = SIZE_ALLOW_ZERO;
659
REFERENCE (refmpn_add);
661
p = ¶m[TYPE_SUB];
663
REFERENCE (refmpn_sub);
666
p = ¶m[TYPE_MUL_1];
671
p->overlap = OVERLAP_LOW_TO_HIGH;
672
REFERENCE (refmpn_mul_1);
674
p = ¶m[TYPE_MUL_1C];
676
p->carry = CARRY_LIMB;
677
REFERENCE (refmpn_mul_1c);
680
p = ¶m[TYPE_MUL_2];
683
p->dst_size[0] = SIZE_PLUS_1;
687
p->overlap = OVERLAP_NOT_SRC2;
688
REFERENCE (refmpn_mul_2);
691
p = ¶m[TYPE_ADDMUL_1];
696
p->dst0_from_src1 = 1;
697
REFERENCE (refmpn_addmul_1);
699
p = ¶m[TYPE_ADDMUL_1C];
700
COPY (TYPE_ADDMUL_1);
701
p->carry = CARRY_LIMB;
702
REFERENCE (refmpn_addmul_1c);
704
p = ¶m[TYPE_SUBMUL_1];
705
COPY (TYPE_ADDMUL_1);
706
REFERENCE (refmpn_submul_1);
708
p = ¶m[TYPE_SUBMUL_1C];
709
COPY (TYPE_ADDMUL_1C);
710
REFERENCE (refmpn_submul_1c);
713
p = ¶m[TYPE_AND_N];
717
REFERENCE (refmpn_and_n);
719
p = ¶m[TYPE_ANDN_N];
721
REFERENCE (refmpn_andn_n);
723
p = ¶m[TYPE_NAND_N];
725
REFERENCE (refmpn_nand_n);
727
p = ¶m[TYPE_IOR_N];
729
REFERENCE (refmpn_ior_n);
731
p = ¶m[TYPE_IORN_N];
733
REFERENCE (refmpn_iorn_n);
735
p = ¶m[TYPE_NIOR_N];
737
REFERENCE (refmpn_nior_n);
739
p = ¶m[TYPE_XOR_N];
741
REFERENCE (refmpn_xor_n);
743
p = ¶m[TYPE_XNOR_N];
745
REFERENCE (refmpn_xnor_n);
748
p = ¶m[TYPE_ADDSUB_N];
754
REFERENCE (refmpn_addsub_n);
756
p = ¶m[TYPE_ADDSUB_NC];
757
COPY (TYPE_ADDSUB_N);
759
REFERENCE (refmpn_addsub_nc);
762
p = ¶m[TYPE_COPY];
765
p->overlap = OVERLAP_NONE;
766
p->size = SIZE_ALLOW_ZERO;
767
REFERENCE (refmpn_copy);
769
p = ¶m[TYPE_COPYI];
772
p->overlap = OVERLAP_LOW_TO_HIGH;
773
p->size = SIZE_ALLOW_ZERO;
774
REFERENCE (refmpn_copyi);
776
p = ¶m[TYPE_COPYD];
779
p->overlap = OVERLAP_HIGH_TO_LOW;
780
p->size = SIZE_ALLOW_ZERO;
781
REFERENCE (refmpn_copyd);
783
p = ¶m[TYPE_COM_N];
786
REFERENCE (refmpn_com_n);
789
p = ¶m[TYPE_MOD_1];
792
p->size = SIZE_ALLOW_ZERO;
793
p->divisor = DIVISOR_LIMB;
794
REFERENCE (refmpn_mod_1);
796
p = ¶m[TYPE_MOD_1C];
798
p->carry = CARRY_DIVISOR;
799
REFERENCE (refmpn_mod_1c);
801
p = ¶m[TYPE_DIVMOD_1];
804
REFERENCE (refmpn_divmod_1);
806
p = ¶m[TYPE_DIVMOD_1C];
807
COPY (TYPE_DIVMOD_1);
808
p->carry = CARRY_DIVISOR;
809
REFERENCE (refmpn_divmod_1c);
811
p = ¶m[TYPE_DIVREM_1];
812
COPY (TYPE_DIVMOD_1);
813
p->size2 = SIZE_FRACTION;
814
p->dst_size[0] = SIZE_SUM;
815
REFERENCE (refmpn_divrem_1);
817
p = ¶m[TYPE_DIVREM_1C];
818
COPY (TYPE_DIVREM_1);
819
p->carry = CARRY_DIVISOR;
820
REFERENCE (refmpn_divrem_1c);
822
p = ¶m[TYPE_PREINV_DIVREM_1];
823
COPY (TYPE_DIVREM_1);
824
p->size = SIZE_YES; /* ie. no size==0 */
825
REFERENCE (refmpn_preinv_divrem_1);
827
p = ¶m[TYPE_PREINV_MOD_1];
830
p->divisor = DIVISOR_NORM;
831
REFERENCE (refmpn_preinv_mod_1);
833
p = ¶m[TYPE_MOD_34LSUB1];
836
VALIDATE (validate_mod_34lsub1);
838
p = ¶m[TYPE_UDIV_QRNND];
842
p->dst_size[0] = SIZE_1;
843
p->divisor = DIVISOR_LIMB;
844
p->data = DATA_UDIV_QRNND;
845
p->overlap = OVERLAP_NONE;
846
REFERENCE (refmpn_udiv_qrnnd);
849
p = ¶m[TYPE_DIVEXACT_1];
853
p->data = DATA_MULTIPLE_DIVISOR;
854
VALIDATE (validate_divexact_1);
855
REFERENCE (refmpn_divmod_1);
858
p = ¶m[TYPE_DIVEXACT_BY3];
862
REFERENCE (refmpn_divexact_by3);
864
p = ¶m[TYPE_DIVEXACT_BY3C];
865
COPY (TYPE_DIVEXACT_BY3);
867
REFERENCE (refmpn_divexact_by3c);
870
p = ¶m[TYPE_MODEXACT_1_ODD];
873
p->divisor = DIVISOR_ODD;
874
VALIDATE (validate_modexact_1_odd);
876
p = ¶m[TYPE_MODEXACT_1C_ODD];
877
COPY (TYPE_MODEXACT_1_ODD);
878
p->carry = CARRY_LIMB;
879
VALIDATE (validate_modexact_1c_odd);
882
p = ¶m[TYPE_GCD_1];
885
p->data = DATA_NON_ZERO;
886
p->divisor = DIVISOR_LIMB;
887
REFERENCE (refmpn_gcd_1);
889
p = ¶m[TYPE_GCD];
895
p->dst_size[0] = SIZE_RETVAL;
896
p->overlap = OVERLAP_NOT_SRCS;
898
REFERENCE (refmpn_gcd);
901
p = ¶m[TYPE_GCD_FINDA];
904
REFERENCE (refmpn_gcd_finda);
907
p = ¶m[TYPE_MPZ_JACOBI];
910
p->size = SIZE_ALLOW_ZERO;
915
REFERENCE (refmpz_jacobi);
917
p = ¶m[TYPE_MPZ_KRONECKER];
918
COPY (TYPE_MPZ_JACOBI);
919
REFERENCE (refmpz_kronecker);
922
p = ¶m[TYPE_MPZ_KRONECKER_UI];
925
p->size = SIZE_ALLOW_ZERO;
927
p->carry = CARRY_BIT;
928
REFERENCE (refmpz_kronecker_ui);
930
p = ¶m[TYPE_MPZ_KRONECKER_SI];
931
COPY (TYPE_MPZ_KRONECKER_UI);
932
REFERENCE (refmpz_kronecker_si);
934
p = ¶m[TYPE_MPZ_UI_KRONECKER];
935
COPY (TYPE_MPZ_KRONECKER_UI);
936
REFERENCE (refmpz_ui_kronecker);
938
p = ¶m[TYPE_MPZ_SI_KRONECKER];
939
COPY (TYPE_MPZ_KRONECKER_UI);
940
REFERENCE (refmpz_si_kronecker);
943
p = ¶m[TYPE_SQR];
946
p->dst_size[0] = SIZE_SUM;
947
p->overlap = OVERLAP_NONE;
948
REFERENCE (refmpn_sqr);
950
p = ¶m[TYPE_MUL_N];
953
REFERENCE (refmpn_mul_n);
955
p = ¶m[TYPE_MUL_BASECASE];
958
REFERENCE (refmpn_mul_basecase);
960
p = ¶m[TYPE_UMUL_PPMM];
964
p->dst_size[0] = SIZE_1;
965
p->overlap = OVERLAP_NONE;
966
REFERENCE (refmpn_umul_ppmm);
968
p = ¶m[TYPE_UMUL_PPMM_R];
969
COPY (TYPE_UMUL_PPMM);
970
REFERENCE (refmpn_umul_ppmm_r);
973
p = ¶m[TYPE_RSHIFT];
978
p->overlap = OVERLAP_LOW_TO_HIGH;
979
REFERENCE (refmpn_rshift);
981
p = ¶m[TYPE_LSHIFT];
983
p->overlap = OVERLAP_HIGH_TO_LOW;
984
REFERENCE (refmpn_lshift);
987
p = ¶m[TYPE_POPCOUNT];
990
REFERENCE (refmpn_popcount);
992
p = ¶m[TYPE_HAMDIST];
993
COPY (TYPE_POPCOUNT);
995
REFERENCE (refmpn_hamdist);
998
p = ¶m[TYPE_SB_DIVREM_MN];
1004
p->data = DATA_SRC1_HIGHBIT;
1006
p->dst_size[0] = SIZE_DIFF;
1007
p->overlap = OVERLAP_NONE;
1008
REFERENCE (refmpn_sb_divrem_mn);
1010
p = ¶m[TYPE_TDIV_QR];
1016
p->dst_size[0] = SIZE_DIFF_PLUS_1;
1017
p->dst_size[1] = SIZE_SIZE2;
1018
p->overlap = OVERLAP_NONE;
1019
REFERENCE (refmpn_tdiv_qr);
1021
p = ¶m[TYPE_SQRTREM];
1026
p->dst_size[0] = SIZE_CEIL_HALF;
1027
p->dst_size[1] = SIZE_RETVAL;
1028
p->overlap = OVERLAP_NONE;
1029
VALIDATE (validate_sqrtrem);
1031
p = ¶m[TYPE_ZERO];
1033
p->size = SIZE_ALLOW_ZERO;
1034
REFERENCE (refmpn_zero);
1036
p = ¶m[TYPE_GET_STR];
1038
p->size = SIZE_ALLOW_ZERO;
1041
p->dst_size[0] = SIZE_GET_STR;
1042
p->dst_bytes[0] = 1;
1043
p->overlap = OVERLAP_NONE;
1044
REFERENCE (refmpn_get_str);
1046
#ifdef EXTRA_PARAM_INIT
1052
/* The following are macros if there's no native versions, so wrap them in
1053
functions that can be in try_array[]. */
1056
MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1057
{ MPN_COPY (rp, sp, size); }
1060
MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1061
{ MPN_COPY_INCR (rp, sp, size); }
1064
MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1065
{ MPN_COPY_DECR (rp, sp, size); }
1068
__GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1069
{ __GMPN_COPY (rp, sp, size); }
1071
#ifdef __GMPN_COPY_INCR
1073
__GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1074
{ __GMPN_COPY_INCR (rp, sp, size); }
1078
mpn_com_n_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1079
{ mpn_com_n (rp, sp, size); }
1082
mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1083
{ mpn_and_n (rp, s1, s2, size); }
1086
mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1087
{ mpn_andn_n (rp, s1, s2, size); }
1090
mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1091
{ mpn_nand_n (rp, s1, s2, size); }
1094
mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1095
{ mpn_ior_n (rp, s1, s2, size); }
1098
mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1099
{ mpn_iorn_n (rp, s1, s2, size); }
1102
mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1103
{ mpn_nior_n (rp, s1, s2, size); }
1106
mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1107
{ mpn_xor_n (rp, s1, s2, size); }
1110
mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1111
{ mpn_xnor_n (rp, s1, s2, size); }
1114
udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1117
udiv_qrnnd (q, *remptr, n1, n0, d);
1122
mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1124
return mpn_divexact_by3 (rp, sp, size);
1128
mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1130
return mpn_modexact_1_odd (ptr, size, divisor);
1134
mpn_kara_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1139
tspace = TMP_ALLOC_LIMBS (MPN_KARA_MUL_N_TSIZE (size));
1140
mpn_kara_mul_n (dst, src1, src2, size, tspace);
1143
mpn_kara_sqr_n_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1148
tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (size));
1149
mpn_kara_sqr_n (dst, src, size, tspace);
1153
mpn_toom3_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1158
tspace = TMP_ALLOC_LIMBS (MPN_TOOM3_MUL_N_TSIZE (size));
1159
mpn_toom3_mul_n (dst, src1, src2, size, tspace);
1162
mpn_toom3_sqr_n_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1167
tspace = TMP_ALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (size));
1168
mpn_toom3_sqr_n (dst, src, size, tspace);
1172
umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1175
umul_ppmm (high, *lowptr, m1, m2);
1179
mpn_umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1182
umul_ppmm (high, *lowptr, m1, m2);
1187
MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1188
{ MPN_ZERO (ptr, size); }
1199
#define TRY(fun) #fun, (tryfun_t) fun
1200
#define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1202
#define TRY(fun) "fun", (tryfun_t) fun
1203
#define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
1206
const struct choice_t choice_array[] = {
1207
{ TRY(mpn_add), TYPE_ADD },
1208
{ TRY(mpn_sub), TYPE_SUB },
1210
{ TRY(mpn_add_n), TYPE_ADD_N },
1211
{ TRY(mpn_sub_n), TYPE_SUB_N },
1213
#if HAVE_NATIVE_mpn_add_nc
1214
{ TRY(mpn_add_nc), TYPE_ADD_NC },
1216
#if HAVE_NATIVE_mpn_sub_nc
1217
{ TRY(mpn_sub_nc), TYPE_SUB_NC },
1220
{ TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
1221
{ TRY(mpn_submul_1), TYPE_SUBMUL_1 },
1222
#if HAVE_NATIVE_mpn_addmul_1c
1223
{ TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1225
#if HAVE_NATIVE_mpn_submul_1c
1226
{ TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1229
{ TRY_FUNFUN(mpn_com_n), TYPE_COM_N },
1231
{ TRY_FUNFUN(MPN_COPY), TYPE_COPY },
1232
{ TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1233
{ TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1235
{ TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
1236
#ifdef __GMPN_COPY_INCR
1237
{ TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1240
{ TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
1241
{ TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1242
{ TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1243
{ TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
1244
{ TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1245
{ TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1246
{ TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
1247
{ TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1249
{ TRY(mpn_divrem_1), TYPE_DIVREM_1 },
1250
#if USE_PREINV_DIVREM_1
1251
{ TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1253
{ TRY(mpn_mod_1), TYPE_MOD_1 },
1254
{ TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1255
#if HAVE_NATIVE_mpn_divrem_1c
1256
{ TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
1258
#if HAVE_NATIVE_mpn_mod_1c
1259
{ TRY(mpn_mod_1c), TYPE_MOD_1C },
1261
{ TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
1262
{ TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1264
{ TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
1265
{ TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1266
{ TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
1268
{ TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1269
{ TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
1272
{ TRY(mpn_sb_divrem_mn), TYPE_SB_DIVREM_MN, 3},
1273
{ TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
1275
{ TRY(mpn_mul_1), TYPE_MUL_1 },
1276
#if HAVE_NATIVE_mpn_mul_1c
1277
{ TRY(mpn_mul_1c), TYPE_MUL_1C },
1279
#if HAVE_NATIVE_mpn_mul_2
1280
{ TRY(mpn_mul_2), TYPE_MUL_2 },
1283
{ TRY(mpn_rshift), TYPE_RSHIFT },
1284
{ TRY(mpn_lshift), TYPE_LSHIFT },
1287
{ TRY(mpn_mul_basecase), TYPE_MUL_BASECASE },
1288
{ TRY(mpn_sqr_basecase), TYPE_SQR },
1290
{ TRY(mpn_mul), TYPE_MUL_BASECASE },
1291
{ TRY(mpn_mul_n), TYPE_MUL_N },
1292
{ TRY(mpn_sqr_n), TYPE_SQR },
1294
{ TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1296
{ TRY_FUNFUN(mpn_kara_mul_n), TYPE_MUL_N, MPN_KARA_MUL_N_MINSIZE },
1297
{ TRY_FUNFUN(mpn_kara_sqr_n), TYPE_SQR, MPN_KARA_SQR_N_MINSIZE },
1298
{ TRY_FUNFUN(mpn_toom3_mul_n), TYPE_MUL_N, MPN_TOOM3_MUL_N_MINSIZE },
1299
{ TRY_FUNFUN(mpn_toom3_sqr_n), TYPE_SQR, MPN_TOOM3_SQR_N_MINSIZE },
1301
{ TRY(mpn_gcd_1), TYPE_GCD_1 },
1302
{ TRY(mpn_gcd), TYPE_GCD },
1303
#if HAVE_NATIVE_mpn_gcd_finda
1304
{ TRY(mpn_gcd_finda), TYPE_GCD_FINDA },
1306
{ TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
1307
{ TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
1308
{ TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
1309
{ TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
1310
{ TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
1312
{ TRY(mpn_popcount), TYPE_POPCOUNT },
1313
{ TRY(mpn_hamdist), TYPE_HAMDIST },
1315
{ TRY(mpn_sqrtrem), TYPE_SQRTREM },
1317
{ TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
1319
{ TRY(mpn_get_str), TYPE_GET_STR },
1321
#ifdef EXTRA_ROUTINES
1326
const struct choice_t *choice = NULL;
1330
mprotect_maybe (void *addr, size_t len, int prot)
1332
if (!option_redzones)
1336
if (mprotect (addr, len, prot) != 0)
1338
fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X\n", addr, len, prot);
1343
static int warned = 0;
1347
"mprotect not available, bounds testing not performed\n");
1354
/* round "a" up to a multiple of "m" */
1356
round_up_multiple (size_t a, size_t m)
1368
/* On some systems it seems that only an mmap'ed region can be mprotect'ed,
1369
for instance HP-UX 10.
1371
mmap will almost certainly return a pointer already aligned to a page
1372
boundary, but it's easy enough to share the alignment handling with the
1376
malloc_region (struct region_t *r, mp_size_t n)
1381
ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
1383
n = round_up_multiple (n, PAGESIZE_LIMBS);
1386
nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
1388
#if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
1389
#define MAP_ANON MAP_ANONYMOUS
1392
#if HAVE_MMAP && defined (MAP_ANON)
1393
/* note must pass fd=-1 for MAP_ANON on BSD */
1394
p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
1395
if (p == (void *) -1)
1397
fprintf (stderr, "Cannot mmap %#x anon bytes\n", nbytes);
1401
p = malloc (nbytes);
1402
ASSERT_ALWAYS (p != NULL);
1405
p = align_pointer (p, pagesize);
1407
mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
1411
mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
1415
mprotect_region (const struct region_t *r, int prot)
1417
mprotect_maybe (r->ptr, r->size, prot);
1421
/* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
1423
mp_limb_t carry_array[] = {
1432
#define CARRY_COUNT \
1433
((tr->carry == CARRY_BIT) ? 2 \
1434
: tr->carry == CARRY_3 ? 3 \
1435
: tr->carry == CARRY_4 ? 4 \
1436
: (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
1437
? numberof(carry_array) + CARRY_RANDOMS \
1440
#define MPN_RANDOM_ALT(index,dst,size) \
1441
(((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
1443
/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1445
#define CARRY_ITERATION \
1446
for (carry_index = 0; \
1447
(carry_index < numberof (carry_array) \
1448
? (carry = carry_array[carry_index]) \
1449
: (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
1450
(tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
1451
carry_index < CARRY_COUNT; \
1455
mp_limb_t multiplier_array[] = {
1463
int multiplier_index;
1465
mp_limb_t divisor_array[] = {
1470
GMP_NUMB_HIGHBIT + 1,
1478
/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1480
#define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
1482
(index < numberof (array) \
1483
? (var = array[index]) \
1484
: (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
1488
#define MULTIPLIER_COUNT \
1490
? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
1493
#define MULTIPLIER_ITERATION \
1494
ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
1495
multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
1497
#define DIVISOR_COUNT \
1499
? numberof (divisor_array) + DIVISOR_RANDOMS \
1502
#define DIVISOR_ITERATION \
1503
ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
1504
DIVISOR_RANDOMS, TRY_DIVISOR)
1507
/* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
1508
d[0] or d[1] respectively, -1 means a separate (write-protected)
1513
} overlap_array[] = {
1525
struct overlap_t *overlap, *overlap_limit;
1527
#define OVERLAP_COUNT \
1528
(tr->overlap & OVERLAP_NONE ? 1 \
1529
: tr->overlap & OVERLAP_NOT_SRCS ? 3 \
1530
: tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
1536
#define OVERLAP_ITERATION \
1537
for (overlap = &overlap_array[0], \
1538
overlap_limit = &overlap_array[OVERLAP_COUNT]; \
1539
overlap < overlap_limit; \
1545
#define T_RAND_COUNT 2
1549
t_random (mp_ptr ptr, mp_size_t n)
1554
switch (option_data) {
1557
case 0: refmpn_random (ptr, n); break;
1558
case 1: refmpn_random2 (ptr, n); break;
1564
static mp_limb_t counter = 0;
1566
for (i = 0; i < n; i++)
1571
refmpn_zero (ptr, n);
1574
refmpn_fill (ptr, n, GMP_NUMB_MAX);
1577
/* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
1578
inducing the q1_ff special case in the mul-by-inverse part of some
1579
versions of divrem_1 and mod_1. */
1580
refmpn_fill (ptr, n, (mp_limb_t) -1);
1589
#define T_RAND_ITERATION \
1590
for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
1594
print_each (const struct each_t *e)
1598
printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
1600
mpn_trace (" retval", &e->retval, 1);
1602
for (i = 0; i < NUM_DESTS; i++)
1606
if (tr->dst_bytes[i])
1607
byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
1609
mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
1610
printf (" located %p\n", e->d[i].p);
1614
for (i = 0; i < NUM_SOURCES; i++)
1616
printf (" s[%d] located %p\n", i, e->s[i].p);
1626
printf ("size %ld\n", size);
1628
printf ("size2 %ld\n", size2);
1630
for (i = 0; i < NUM_DESTS; i++)
1631
if (d[i].size != size)
1632
printf ("d[%d].size %ld\n", i, d[i].size);
1635
mpn_trace (" multiplier", &multiplier, 1);
1637
mpn_trace (" divisor", &divisor, 1);
1639
printf (" shift %lu\n", shift);
1641
mpn_trace (" carry", &carry, 1);
1643
for (i = 0; i < NUM_DESTS; i++)
1645
printf (" d[%d] %s, align %ld, size %ld\n",
1646
i, d[i].high ? "high" : "low", d[i].align, d[i].size);
1648
for (i = 0; i < NUM_SOURCES; i++)
1652
printf (" s[%d] %s, align %ld, ",
1653
i, s[i].high ? "high" : "low", s[i].align);
1654
switch (overlap->s[i]) {
1656
printf ("no overlap\n");
1659
printf ("==d[%d]%s\n",
1661
tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
1662
: tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
1666
printf (" s[%d]=", i);
1667
if (tr->carry_sign && (carry & (1 << i)))
1669
mpn_trace (NULL, s[i].p, SRC_SIZE(i));
1673
if (tr->dst0_from_src1)
1674
mpn_trace (" d[0]", s[1].region.ptr, size);
1687
if (tr->retval && ref.retval != fun.retval)
1689
printf ("Different return values (%lu, %lu)\n",
1690
ref.retval, fun.retval);
1694
for (i = 0; i < NUM_DESTS; i++)
1696
switch (tr->dst_size[0]) {
1698
d[i].size = ref.retval;
1703
for (i = 0; i < NUM_DESTS; i++)
1708
if (tr->dst_bytes[i])
1710
if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
1712
printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
1714
byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
1715
byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
1722
&& ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
1724
printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
1726
mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
1727
mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
1741
/* The functions are cast if the return value should be a long rather than
1742
the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
1743
might not be enough if some actual calling conventions checking is
1744
implemented on a long long limb system. */
1747
call (struct each_t *e, tryfun_t function)
1749
switch (choice->type) {
1752
e->retval = CALLING_CONVENTIONS (function)
1753
(e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
1758
e->retval = CALLING_CONVENTIONS (function)
1759
(e->d[0].p, e->s[0].p, e->s[1].p, size);
1763
e->retval = CALLING_CONVENTIONS (function)
1764
(e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
1770
e->retval = CALLING_CONVENTIONS (function)
1771
(e->d[0].p, e->s[0].p, size, multiplier);
1774
case TYPE_ADDMUL_1C:
1775
case TYPE_SUBMUL_1C:
1776
e->retval = CALLING_CONVENTIONS (function)
1777
(e->d[0].p, e->s[0].p, size, multiplier, carry);
1781
e->retval = CALLING_CONVENTIONS (function)
1782
(e->d[0].p, e->s[0].p, size, e->s[1].p);
1793
CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
1797
e->retval = CALLING_CONVENTIONS (function)
1798
(e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
1800
case TYPE_ADDSUB_NC:
1801
e->retval = CALLING_CONVENTIONS (function)
1802
(e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
1809
CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
1813
case TYPE_DIVEXACT_BY3:
1814
e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
1816
case TYPE_DIVEXACT_BY3C:
1817
e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
1823
case TYPE_DIVEXACT_1:
1824
e->retval = CALLING_CONVENTIONS (function)
1825
(e->d[0].p, e->s[0].p, size, divisor);
1827
case TYPE_DIVMOD_1C:
1828
e->retval = CALLING_CONVENTIONS (function)
1829
(e->d[0].p, e->s[0].p, size, divisor, carry);
1832
e->retval = CALLING_CONVENTIONS (function)
1833
(e->d[0].p, size2, e->s[0].p, size, divisor);
1835
case TYPE_DIVREM_1C:
1836
e->retval = CALLING_CONVENTIONS (function)
1837
(e->d[0].p, size2, e->s[0].p, size, divisor, carry);
1839
case TYPE_PREINV_DIVREM_1:
1843
shift = refmpn_count_leading_zeros (divisor);
1844
dinv = refmpn_invert_limb (divisor << shift);
1845
e->retval = CALLING_CONVENTIONS (function)
1846
(e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
1850
case TYPE_MODEXACT_1_ODD:
1851
e->retval = CALLING_CONVENTIONS (function)
1852
(e->s[0].p, size, divisor);
1855
case TYPE_MODEXACT_1C_ODD:
1856
e->retval = CALLING_CONVENTIONS (function)
1857
(e->s[0].p, size, divisor, carry);
1859
case TYPE_PREINV_MOD_1:
1860
e->retval = CALLING_CONVENTIONS (function)
1861
(e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
1863
case TYPE_MOD_34LSUB1:
1864
e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
1866
case TYPE_UDIV_QRNND:
1867
e->retval = CALLING_CONVENTIONS (function)
1868
(e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
1871
case TYPE_SB_DIVREM_MN:
1872
refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
1873
refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
1874
e->retval = CALLING_CONVENTIONS (function)
1875
(e->d[0].p, e->d[1].p, size, e->s[1].p, size2);
1876
refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
1879
CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
1880
e->s[0].p, size, e->s[1].p, size2);
1884
/* Must have a non-zero src, but this probably isn't the best way to do
1886
if (refmpn_zero_p (e->s[0].p, size))
1889
e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
1893
/* Sources are destroyed, so they're saved and replaced, but a general
1894
approach to this might be better. Note that it's still e->s[0].p and
1895
e->s[1].p that are passed, to get the desired alignments. */
1897
mp_ptr s0 = refmpn_malloc_limbs (size);
1898
mp_ptr s1 = refmpn_malloc_limbs (size2);
1899
refmpn_copyi (s0, e->s[0].p, size);
1900
refmpn_copyi (s1, e->s[1].p, size2);
1902
mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
1903
mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
1904
e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
1907
refmpn_copyi (e->s[0].p, s0, size);
1908
refmpn_copyi (e->s[1].p, s1, size2);
1914
case TYPE_GCD_FINDA:
1916
/* FIXME: do this with a flag */
1918
c[0] = e->s[0].p[0];
1919
c[0] += (c[0] == 0);
1920
c[1] = e->s[0].p[0];
1921
c[1] += (c[1] == 0);
1922
e->retval = CALLING_CONVENTIONS (function) (c);
1926
case TYPE_MPZ_JACOBI:
1927
case TYPE_MPZ_KRONECKER:
1930
PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
1931
PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
1932
e->retval = CALLING_CONVENTIONS (function) (a, b);
1935
case TYPE_MPZ_KRONECKER_UI:
1938
PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
1939
e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
1942
case TYPE_MPZ_KRONECKER_SI:
1945
PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
1946
e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
1949
case TYPE_MPZ_UI_KRONECKER:
1952
PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
1953
e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
1956
case TYPE_MPZ_SI_KRONECKER:
1959
PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
1960
e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
1964
case TYPE_MUL_BASECASE:
1965
CALLING_CONVENTIONS (function)
1966
(e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
1969
CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
1972
CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
1975
case TYPE_UMUL_PPMM:
1976
e->retval = CALLING_CONVENTIONS (function)
1977
(e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
1979
case TYPE_UMUL_PPMM_R:
1980
e->retval = CALLING_CONVENTIONS (function)
1981
(e->s[0].p[0], e->s[0].p[1], e->d[0].p);
1986
e->retval = CALLING_CONVENTIONS (function)
1987
(e->d[0].p, e->s[0].p, size, shift);
1991
e->retval = (* (unsigned long (*)(ANYARGS))
1992
CALLING_CONVENTIONS (function)) (e->s[0].p, size);
1995
e->retval = (* (unsigned long (*)(ANYARGS))
1996
CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2000
e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2001
(e->d[0].p, e->d[1].p, e->s[0].p, size);
2005
CALLING_CONVENTIONS (function) (e->d[0].p, size);
2010
size_t sizeinbase, fill;
2012
MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2013
ASSERT_ALWAYS (sizeinbase <= d[0].size);
2014
fill = d[0].size - sizeinbase;
2017
memset (e->d[0].p, 0xBA, fill);
2018
dst = (char *) e->d[0].p + fill;
2022
dst = (char *) e->d[0].p;
2023
memset (dst + sizeinbase, 0xBA, fill);
2027
e->retval = CALLING_CONVENTIONS (function) (dst, base,
2032
refmpn_copy (e->d[1].p, e->s[0].p, size);
2033
e->retval = CALLING_CONVENTIONS (function) (dst, base,
2036
refmpn_zero (e->d[1].p, size); /* cloberred or unused */
2045
printf ("Unknown routine type %d\n", choice->type);
2053
pointer_setup (struct each_t *e)
2057
for (i = 0; i < NUM_DESTS; i++)
2059
switch (tr->dst_size[i]) {
2061
case SIZE_RETVAL: /* will be adjusted later */
2081
d[i].size = size + size2;
2091
d[i].size = size - size2;
2094
case SIZE_DIFF_PLUS_1:
2095
d[i].size = size - size2 + 1;
2098
case SIZE_CEIL_HALF:
2099
d[i].size = (size+1)/2;
2104
mp_limb_t ff = GMP_NUMB_MAX;
2105
MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2110
printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2115
/* establish e->d[].p destinations */
2116
for (i = 0; i < NUM_DESTS; i++)
2118
mp_size_t offset = 0;
2120
/* possible room for overlapping sources */
2121
for (j = 0; j < numberof (overlap->s); j++)
2122
if (overlap->s[j] == i)
2123
offset = MAX (offset, s[j].align);
2127
if (tr->dst_bytes[i])
2129
e->d[i].p = (mp_ptr)
2130
((char *) (e->d[i].region.ptr + e->d[i].region.size)
2131
- d[i].size - d[i].align);
2135
e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
2136
- d[i].size - d[i].align;
2137
if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2138
e->d[i].p -= offset;
2143
if (tr->dst_bytes[i])
2145
e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
2149
e->d[i].p = e->d[i].region.ptr + d[i].align;
2150
if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2151
e->d[i].p += offset;
2156
/* establish e->s[].p sources */
2157
for (i = 0; i < NUM_SOURCES; i++)
2159
int o = overlap->s[i];
2167
/* overlap with d[o] */
2168
if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2169
e->s[i].p = e->d[o].p - s[i].align;
2170
else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2171
e->s[i].p = e->d[o].p + s[i].align;
2172
else if (tr->size2 == SIZE_FRACTION)
2173
e->s[i].p = e->d[o].p + size2;
2175
e->s[i].p = e->d[o].p;
2186
validate_fail (void)
2190
trap_location = TRAP_REF;
2191
call (&ref, tr->reference);
2192
trap_location = TRAP_NOWHERE;
2209
trap_location = TRAP_SETUPS;
2211
if (tr->divisor == DIVISOR_NORM)
2212
divisor |= GMP_NUMB_HIGHBIT;
2213
if (tr->divisor == DIVISOR_ODD)
2216
for (i = 0; i < NUM_SOURCES; i++)
2219
s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
2221
s[i].p = s[i].region.ptr + s[i].align;
2224
pointer_setup (&ref);
2225
pointer_setup (&fun);
2227
for (i = 0; i < NUM_DESTS; i++)
2232
if (tr->dst0_from_src1 && i==0)
2234
t_random (s[1].region.ptr, d[0].size);
2235
MPN_COPY (fun.d[0].p, s[1].region.ptr, d[0].size);
2236
MPN_COPY (ref.d[0].p, s[1].region.ptr, d[0].size);
2238
else if (tr->dst_bytes[i])
2240
memset (ref.d[i].p, 0xBA, d[i].size);
2241
memset (fun.d[i].p, 0xBA, d[i].size);
2245
refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
2246
refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
2250
ref.retval = 0x04152637;
2251
fun.retval = 0x8C9DAEBF;
2253
for (i = 0; i < NUM_SOURCES; i++)
2258
mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
2259
t_random (s[i].p, SRC_SIZE(i));
2263
if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
2267
case DATA_MULTIPLE_DIVISOR:
2268
/* same number of low zero bits as divisor */
2269
s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
2270
refmpn_sub_1 (s[i].p, s[i].p, size,
2271
refmpn_mod_1 (s[i].p, size, divisor));
2275
/* s[1] no more bits than s[0] */
2276
if (i == 1 && size2 == size)
2277
s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
2279
/* high limb non-zero */
2280
s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
2291
case DATA_SRC1_HIGHBIT:
2295
s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
2297
s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2301
case DATA_UDIV_QRNND:
2302
s[i].p[1] %= divisor;
2306
mprotect_region (&s[i].region, PROT_READ);
2308
if (ref.s[i].p != s[i].p)
2310
refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
2311
refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
2318
if (tr->validate != NULL)
2320
trap_location = TRAP_FUN;
2321
call (&fun, choice->function);
2322
trap_location = TRAP_NOWHERE;
2324
if (! CALLING_CONVENTIONS_CHECK ())
2334
trap_location = TRAP_REF;
2335
call (&ref, tr->reference);
2336
trap_location = TRAP_FUN;
2337
call (&fun, choice->function);
2338
trap_location = TRAP_NOWHERE;
2340
if (! CALLING_CONVENTIONS_CHECK ())
2351
#define SIZE_ITERATION \
2352
for (size = MAX3 (option_firstsize, \
2354
(tr->size == SIZE_ALLOW_ZERO) ? 0 : 1); \
2355
size <= option_lastsize; \
2358
#define SIZE2_FIRST \
2359
(tr->size2 == SIZE_2 ? 2 \
2360
: tr->size2 == SIZE_FRACTION ? 0 \
2362
MAX (choice->minsize, (option_firstsize2 != 0 \
2363
? option_firstsize2 : 1)) \
2366
#define SIZE2_LAST \
2367
(tr->size2 == SIZE_2 ? 2 \
2368
: tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
2369
: tr->size2 ? size \
2372
#define SIZE2_ITERATION \
2373
for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
2375
#define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
2376
#define ALIGN_ITERATION(w,n,cond) \
2377
for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
2379
#define HIGH_LIMIT(cond) ((cond) != 0)
2380
#define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
2381
#define HIGH_ITERATION(w,n,cond) \
2382
for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
2384
#define SHIFT_LIMIT \
2385
((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
2387
#define SHIFT_ITERATION \
2388
for (shift = 1; shift <= SHIFT_LIMIT; shift++)
2397
unsigned long total = 1;
2399
total *= option_repetitions;
2400
total *= option_lastsize;
2401
if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
2402
else if (tr->size2) total *= (option_lastsize+1)/2;
2404
total *= SHIFT_LIMIT;
2405
total *= MULTIPLIER_COUNT;
2406
total *= DIVISOR_COUNT;
2407
total *= CARRY_COUNT;
2408
total *= T_RAND_COUNT;
2410
total *= HIGH_COUNT (tr->dst[0]);
2411
total *= HIGH_COUNT (tr->dst[1]);
2412
total *= HIGH_COUNT (tr->src[0]);
2413
total *= HIGH_COUNT (tr->src[1]);
2415
total *= ALIGN_COUNT (tr->dst[0]);
2416
total *= ALIGN_COUNT (tr->dst[1]);
2417
total *= ALIGN_COUNT (tr->src[0]);
2418
total *= ALIGN_COUNT (tr->src[1]);
2420
total *= OVERLAP_COUNT;
2422
printf ("%s %lu\n", choice->name, total);
2427
for (i = 0; i < option_repetitions; i++)
2432
MULTIPLIER_ITERATION
2434
CARRY_ITERATION /* must be after divisor */
2437
HIGH_ITERATION(d,0, tr->dst[0])
2438
HIGH_ITERATION(d,1, tr->dst[1])
2439
HIGH_ITERATION(s,0, tr->src[0])
2440
HIGH_ITERATION(s,1, tr->src[1])
2442
ALIGN_ITERATION(d,0, tr->dst[0])
2443
ALIGN_ITERATION(d,1, tr->dst[1])
2444
ALIGN_ITERATION(s,0, tr->src[0])
2445
ALIGN_ITERATION(s,1, tr->src[1])
2454
/* Usually print_all() doesn't show much, but it might give a hint as to
2455
where the function was up to when it died. */
2459
const char *name = "noname";
2462
case SIGILL: name = "SIGILL"; break;
2464
case SIGBUS: name = "SIGBUS"; break;
2466
case SIGSEGV: name = "SIGSEGV"; break;
2467
case SIGFPE: name = "SIGFPE"; break;
2470
printf ("\n\nSIGNAL TRAP: %s\n", name);
2472
switch (trap_location) {
2474
printf (" in reference function: %s\n", tr->reference_name);
2477
printf (" in test function: %s\n", choice->name);
2481
printf (" in parameter setups\n");
2485
printf (" somewhere unknown\n");
2495
#if HAVE_GETPAGESIZE
2496
/* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
2497
know _SC_PAGESIZE. */
2498
pagesize = getpagesize ();
2501
if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
2503
/* According to the linux man page, sysconf doesn't set errno */
2504
fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
2508
Error, error, cannot get page size
2512
printf ("pagesize is 0x%lX bytes\n", pagesize);
2514
signal (SIGILL, trap);
2516
signal (SIGBUS, trap);
2518
signal (SIGSEGV, trap);
2519
signal (SIGFPE, trap);
2524
for (i = 0; i < NUM_SOURCES; i++)
2526
malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
2527
printf ("s[%d] %p to %p (0x%lX bytes)\n",
2529
s[i].region.ptr + s[i].region.size,
2530
s[i].region.size * BYTES_PER_MP_LIMB);
2533
#define INIT_EACH(e,es) \
2534
for (i = 0; i < NUM_DESTS; i++) \
2536
malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
2537
printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
2538
es, i, e.d[i].region.ptr, \
2539
e.d[i].region.ptr + e.d[i].region.size, \
2540
e.d[i].region.size * BYTES_PER_MP_LIMB); \
2543
INIT_EACH(ref, "ref");
2544
INIT_EACH(fun, "fun");
2549
strmatch_wild (const char *pattern, const char *str)
2553
/* wildcard at start */
2554
if (pattern[0] == '*')
2557
plen = strlen (pattern);
2558
slen = strlen (str);
2560
|| (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
2563
/* wildcard at end */
2564
plen = strlen (pattern);
2565
if (plen >= 1 && pattern[plen-1] == '*')
2566
return (memcmp (pattern, str, plen-1) == 0);
2569
return (strcmp (pattern, str) == 0);
2573
try_name (const char *name)
2578
for (i = 0; i < numberof (choice_array); i++)
2580
if (strmatch_wild (name, choice_array[i].name))
2582
choice = &choice_array[i];
2583
tr = ¶m[choice->type];
2591
printf ("%s unknown\n", name);
2598
usage (const char *prog)
2603
printf ("Usage: %s [options] function...\n\
2604
-1 use limb data 1,2,3,etc\n\
2605
-9 use limb data all 0xFF..FFs\n\
2606
-a zeros use limb data all zeros\n\
2607
-a ffs use limb data all 0xFF..FFs (same as -9)\n\
2608
-a 2fd use data 0x2FFF...FFFD\n\
2609
-p print each case tried (try this if seg faulting)\n\
2610
-R seed random numbers from time()\n\
2611
-r reps set repetitions (default %d)\n\
2612
-s size starting size to test\n\
2613
-S size2 starting size2 to test\n\
2614
-s s1-s2 range of sizes to test\n\
2615
-W don't show the spinner (use this in gdb)\n\
2616
-z disable mprotect() redzones\n\
2617
Default data is refmpn_random() and refmpn_random2().\n\
2619
Functions that can be tested:\n\
2620
", prog, DEFAULT_REPETITIONS);
2622
for (i = 0; i < numberof (choice_array); i++)
2624
if (col + 1 + strlen (choice_array[i].name) > 79)
2629
printf (" %s", choice_array[i].name);
2630
col += 1 + strlen (choice_array[i].name);
2639
main (int argc, char *argv[])
2643
/* unbuffered output */
2644
setbuf (stdout, NULL);
2645
setbuf (stderr, NULL);
2647
/* default trace in hex, and in upper-case so can paste into bc */
2648
mp_trace_base = -16;
2653
unsigned seed = 123;
2656
while ((opt = getopt(argc, argv, "19a:b:pRr:S:s:Wz")) != EOF)
2660
/* use limb data values 1, 2, 3, ... etc */
2661
option_data = DATA_SEQ;
2664
/* use limb data values 0xFFF...FFF always */
2665
option_data = DATA_FFS;
2668
if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
2669
else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
2670
else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
2671
else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
2674
fprintf (stderr, "unrecognised data option: %s\n", optarg);
2679
mp_trace_base = atoi (optarg);
2689
option_repetitions = atoi (optarg);
2694
option_firstsize = atoi (optarg);
2695
if ((p = strchr (optarg, '-')) != NULL)
2696
option_lastsize = atoi (p+1);
2700
/* -S <size> sets the starting size for the second of a two size
2701
routine (like mpn_mul_basecase) */
2702
option_firstsize2 = atoi (optarg);
2705
/* use this when running in the debugger */
2709
/* disable redzones */
2710
option_redzones = 0;
2718
gmp_randseed_ui (RANDS, seed);
2726
for (i = optind; i < argc; i++)