/*
mul_ks-tune.c: tuning program for mul_ks.c module
Copyright (C) 2007, 2008, David Harvey
This file is part of the zn_poly library (version 0.8).
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include "support.h"
#include "zn_poly_internal.h"
#include "profiler.h"
/*
For each modulus bitsize, finds approximate crossover points between KS1,
KS2, and KS4 multiplication (or squaring, if that flag is set) algorithms.
Store these in the global tuning table, and writes some logging information
to _flog_.
*/
void tune_mul_KS(FILE* flog, int squaring, int verbose)
{
unsigned bits;
fprintf(flog, "KS %s: ", squaring ? "sqr" : "mul");
fflush(flog);
// how long we are willing to wait for each profile run
const double speed = 0.0001;
// run tuning process for each modulus size
for (bits = 2; bits <= ULONG_BITS; bits++)
{
// crossovers for KS1->KS2 and KS2->KS4
unsigned long crossover[2];
// which = 0 means comparing KS1 vs KS2
// which = 1 means comparing KS2 vs KS4
unsigned which;
for (which = 0; which < 2; which++)
{
double result[2];
profile_mul_info_t info[2];
info[0]->squaring = info[1]->squaring = squaring;
info[0]->n = info[1]->n = (1UL << (bits - 1)) + 1;
info[0]->algo = which ? ALGO_MUL_KS2_REDC : ALGO_MUL_KS1_REDC;
info[1]->algo = which ? ALGO_MUL_KS4_REDC : ALGO_MUL_KS2_REDC;
// find an upper bound, where 2nd algorithm appears to be safely
// ahead of 1st algorithm
size_t upper;
int found = 0;
for (upper = 45; upper <= 16384 && !found; upper = 2 * upper)
{
info[0]->len = info[1]->len = upper;
result[0] = profile(NULL, NULL, profile_mul, info[0], speed);
result[1] = profile(NULL, NULL, profile_mul, info[1], speed);
if (result[1] < 0.95 * result[0])
found = 1;
}
if (!found)
{
// couldn't find a reasonable upper bound
crossover[which] = SIZE_MAX;
continue;
}
// find a lower bound, where 1st algorithm appears to be safely
// ahead of 2nd algorithm
size_t lower;
found = 0;
for (lower = upper/2; lower >= 2 && !found; lower = lower / 2)
{
info[0]->len = info[1]->len = lower;
result[0] = profile(NULL, NULL, profile_mul, info[0], speed);
result[1] = profile(NULL, NULL, profile_mul, info[1], speed);
if (result[1] > 1.05 * result[0])
found = 1;
}
if (!found)
{
// couldn't find a reasonable lower bound
crossover[which] = 0;
continue;
}
// subdivide [lower, upper] into intervals and sample at each endpoint
double ratio = (double) upper / (double) lower;
const int max_intervals = 30;
size_t points[max_intervals + 1];
double score[max_intervals + 1];
unsigned i;
for (i = 0; i <= max_intervals; i++)
{
points[i] = ceil(lower * pow(ratio, (double) i / max_intervals));
info[0]->len = info[1]->len = points[i];
result[0] = profile(NULL, NULL, profile_mul, info[0], speed);
result[1] = profile(NULL, NULL, profile_mul, info[1], speed);
score[i] = result[1] / result[0];
}
// estimate crossover point
unsigned count = 0;
for (i = 0; i <= max_intervals; i++)
if (score[i] > 1.0)
count++;
crossover[which] = (size_t)
ceil(lower * pow(ratio, (double) count / (max_intervals + 1)));
}
if (verbose)
{
fprintf(flog, "\nbits = %u, cross to KS2 at ", bits);
if (crossover[0] == SIZE_MAX)
fprintf(flog, "infinity");
else
fprintf(flog, "%lu", crossover[0]);
fprintf(flog, ", cross to KS4 at ");
if (crossover[1] == SIZE_MAX)
fprintf(flog, "infinity");
else
fprintf(flog, "%lu", crossover[1]);
}
else
fprintf(flog, ".");
fflush(flog);
if (squaring)
{
tuning_info[bits].sqr_KS2_crossover = crossover[0];
tuning_info[bits].sqr_KS4_crossover = crossover[1];
}
else
{
tuning_info[bits].mul_KS2_crossover = crossover[0];
tuning_info[bits].mul_KS4_crossover = crossover[1];
}
}
fprintf(flog, "\n");
}
// end of file ****************************************************************