2
array-profile.c: routines for profiling simple array operations
4
Copyright (C) 2007, 2008, David Harvey
6
This file is part of the zn_poly library (version 0.8).
8
This program is free software: you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation, either version 2 of the License, or
11
(at your option) version 3 of the License.
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with this program. If not, see <http://www.gnu.org/licenses/>.
27
#include "zn_poly_internal.h"
31
typedef void (*bfly_func)(ulong*, ulong*, ulong, const zn_mod_t);
35
Profiles one of the butterfly routines.
37
arg points to an array of ulongs:
38
* First is 0 for add, 1 for subtract, 2 for inplace butterfly.
39
* Second is 0 for safe version, 1 for slim version.
41
Returns total cycle count for _count_ calls to butterfly of length 1000.
43
double profile_bfly(void* arg, unsigned long count)
45
ulong type = ((ulong*) arg)[0];
46
ulong speed = ((ulong*) arg)[1];
48
(speed ? (1UL << (ULONG_BITS - 2)) : (1UL << (ULONG_BITS - 1)));
53
const ulong len = 1000;
55
ulong* buf1 = (ulong*) malloc(sizeof(ulong) * len);
56
ulong* buf2 = (ulong*) malloc(sizeof(ulong) * len);
58
// generate random inputs
60
for (i = 0; i < len; i++)
61
buf1[i] = random_ulong(n);
62
for (i = 0; i < len; i++)
63
buf2[i] = random_ulong(n);
68
target = (bfly_func) zn_array_add_inplace;
70
target = (bfly_func) zn_array_sub_inplace;
72
target = zn_array_bfly_inplace;
76
for (j = 0; j < count; j++)
77
target(buf1, buf2, len, mod);
79
// do the actual profile
80
cycle_count_t t0 = get_cycle_counter();
82
for (j = 0; j < count; j++)
83
target(buf1, buf2, len, mod);
85
cycle_count_t t1 = get_cycle_counter();
92
return cycle_diff(t0, t1);
98
Profiles mpn_add_n or mpn_sub_n.
100
arg points to a single ulong: 0 for mpn_add_n, 1 for mpn_sub_n.
102
Returns total cycle count for _count_ calls to length 1000 call.
104
double profile_mpn_aors(void* arg, unsigned long count)
106
ulong type = ((ulong*) arg)[0];
108
const ulong len = 1000;
110
mp_limb_t* buf1 = (mp_limb_t*) malloc(sizeof(mp_limb_t) * len);
111
mp_limb_t* buf2 = (mp_limb_t*) malloc(sizeof(mp_limb_t) * len);
113
mp_limb_t (*target)(mp_limb_t*, const mp_limb_t*, const mp_limb_t*,
116
target = type ? mpn_sub_n : mpn_add_n;
118
// generate random inputs
120
for (i = 0; i < len; i++)
121
buf1[i] = random_ulong(1UL << (ULONG_BITS - 1));
122
for (i = 0; i < len; i++)
123
buf2[i] = random_ulong(1UL << (ULONG_BITS - 1));
127
for (j = 0; j < count; j++)
128
target(buf1, buf1, buf2, len);
130
// do the actual profile
131
cycle_count_t t0 = get_cycle_counter();
133
for (j = 0; j < count; j++)
134
target(buf1, buf1, buf2, len);
136
cycle_count_t t1 = get_cycle_counter();
141
return cycle_diff(t0, t1);
146
Profiles scalar multiplication.
148
arg points to an array of ulongs:
149
* First is modulus size in bits.
150
* Second is 0 for regular multiply, 1 for REDC multiply
152
Returns total cycle count for _count_ calls to zn_array_scalar_mul
155
double profile_scalar_mul(void* arg, unsigned long count)
157
int bits = ((ulong*) arg)[0];
158
int algo = ((ulong*) arg)[1];
161
ulong n = random_modulus(bits, 1);
164
ulong scalar = random_ulong(n);
165
const ulong len = 1000;
167
// generate random input
168
ulong* buf = (ulong*) malloc(sizeof(ulong) * len);
170
for (i = 0; i < len; i++)
171
buf[i] = random_ulong(n);
173
cycle_count_t t0, t1;
177
for (j = 0; j < count; j++)
178
_zn_array_scalar_mul(buf, buf, len, scalar, algo, mod);
180
// do the actual profile
181
t0 = get_cycle_counter();
183
for (j = 0; j < count; j++)
184
_zn_array_scalar_mul(buf, buf, len, scalar, algo, mod);
186
t1 = get_cycle_counter();
191
return cycle_diff(t0, t1);
196
// end of file ****************************************************************