/*
mul-tune.c: tuning program for multiplication algorithms
Copyright (C) 2007, 2008, David Harvey
This file is part of the zn_poly library (version 0.8).
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include "support.h"
#include "zn_poly_internal.h"
#include "profiler.h"
/*
For each modulus size, finds approximate crossover points between KS4
and fft multiplication algorithms.
(Note this needs to be done *after* the KS1/KS2/KS4 and nussbaumer
crossovers have been determined, since they are used as subroutines.)
Store these in the global crossover table, and writes some logging
information to flog.
*/
void tune_mul(FILE* flog, int squaring, int verbose)
{
unsigned bits;
fprintf(flog, "KS/FFT %s: ", squaring ? "sqr" : "mul");
fflush(flog);
// how long we are willing to wait for each profile run
const double speed = 0.0001;
// run tuning process for each modulus size
for (bits = 2; bits <= ULONG_BITS; bits++)
{
// crossover for KS4->FFT
size_t crossover;
double result[2];
profile_mul_info_t info[2];
info[0]->squaring = info[1]->squaring = squaring;
info[0]->n = info[1]->n = (1UL << (bits - 1)) + 1;
info[0]->algo = ALGO_MUL_KS4;
info[1]->algo = ALGO_MUL_FFT;
// find an upper bound, where FFT algorithm appears to be safely
// ahead of KS4 algorithm
size_t upper;
int found = 0;
for (upper = 45; upper <= 65536 && !found; upper = 2 * upper)
{
info[0]->len = info[1]->len = upper;
result[0] = profile(NULL, NULL, profile_mul, info[0], speed);
result[1] = profile(NULL, NULL, profile_mul, info[1], speed);
if (result[1] < 0.95 * result[0])
found = 1;
}
if (!found)
{
// couldn't find a reasonable upper bound
crossover = SIZE_MAX;
goto done;
}
// find a lower bound, where KS4 algorithm appears to be safely
// ahead of FFT algorithm
size_t lower;
found = 0;
for (lower = upper/2; lower >= 2 && !found; lower = lower / 2)
{
info[0]->len = info[1]->len = lower;
result[0] = profile(NULL, NULL, profile_mul, info[0], speed);
result[1] = profile(NULL, NULL, profile_mul, info[1], speed);
if (result[1] > 1.05 * result[0])
found = 1;
}
if (!found)
{
// couldn't find a reasonable lower bound
crossover = 0;
goto done;
}
// subdivide [lower, upper] into intervals and sample at each endpoint
double ratio = (double) upper / (double) lower;
const int max_intervals = 20;
size_t points[max_intervals + 1];
double score[max_intervals + 1];
unsigned i;
for (i = 0; i <= max_intervals; i++)
{
points[i] = ceil(lower * pow(ratio, (double) i / max_intervals));
info[0]->len = info[1]->len = points[i];
result[0] = profile(NULL, NULL, profile_mul, info[0], speed);
result[1] = profile(NULL, NULL, profile_mul, info[1], speed);
score[i] = result[1] / result[0];
}
// estimate crossover point
unsigned count = 0;
for (i = 0; i <= max_intervals; i++)
if (score[i] > 1.0)
count++;
crossover = (size_t)
ceil(lower * pow(ratio, (double) count / (max_intervals + 1)));
done:
if (verbose)
{
fprintf(flog, "\nbits = %u, cross to FFT at ", bits);
if (crossover == SIZE_MAX)
fprintf(flog, "infinity");
else
fprintf(flog, "%lu", crossover);
}
else
fprintf(flog, ".");
fflush(flog);
if (squaring)
tuning_info[bits].sqr_fft_crossover = crossover;
else
tuning_info[bits].mul_fft_crossover = crossover;
}
fprintf(flog, "\n");
}
// end of file ****************************************************************