2
midmul.c: middle products
4
Copyright (C) 2007, 2008, David Harvey
6
This file is part of the zn_poly library (version 0.8).
8
This program is free software: you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation, either version 2 of the License, or
11
(at your option) version 3 of the License.
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with this program. If not, see <http://www.gnu.org/licenses/>.
23
#include "zn_poly_internal.h"
26
ulong zn_array_midmul_fallback_get_fudge(size_t len1, size_t len2,
29
return _zn_array_mul_get_fudge(len1, len2, 0, mod);
33
void zn_array_midmul_fallback(ulong* res, const ulong* op1, size_t len1,
34
const ulong* op2, size_t len2, int fastred,
37
ZNP_ASSERT(len2 >= 1);
38
ZNP_ASSERT(len1 >= len2);
40
ZNP_FASTALLOC(temp, ulong, 6624, len1 + len2 - 1);
42
// just do full product and extract relevant segment
43
_zn_array_mul(temp, op1, len1, op2, len2, fastred, mod);
44
zn_array_copy(res, temp + len2 - 1, len1 - len2 + 1);
50
void _zn_array_midmul(ulong* res, const ulong* op1, size_t len1,
51
const ulong* op2, size_t len2, int fastred,
54
ZNP_ASSERT(len2 >= 1);
55
ZNP_ASSERT(len1 >= len2);
57
tuning_info_t* i = &tuning_info[mod->bits];
59
if (len2 < i->midmul_fft_crossover || !(mod->n & 1))
60
// (can't use FFT algorithm if the modulus is even)
61
zn_array_midmul_fallback(res, op1, len1, op2, len2, fastred, mod);
64
ulong scale = zn_array_midmul_fft_get_fudge(len1, len2, mod);
65
zn_array_midmul_fft(res, op1, len1, op2, len2, scale, mod);
70
void zn_array_midmul(ulong* res, const ulong* op1, size_t len1,
71
const ulong* op2, size_t len2, const zn_mod_t mod)
73
_zn_array_midmul(res, op1, len1, op2, len2, 0, mod);
78
void zn_array_midmul_precomp1_init(zn_array_midmul_precomp1_t res,
79
const ulong* op1, size_t len1,
80
size_t len2, const zn_mod_t mod)
86
int odd = (mod->n & 1);
88
// figure out which algorithm to use
91
// can't use FFT algorithm when modulus is even
92
res->algo = ZNP_MIDMUL_ALGO_FALLBACK;
95
tuning_info_t* i = &tuning_info[mod->bits];
97
if (len2 < i->midmul_fft_crossover)
98
res->algo = ZNP_MIDMUL_ALGO_FALLBACK;
100
res->algo = ZNP_MIDMUL_ALGO_FFT;
103
// now perform initialisation for chosen algorithm
107
case ZNP_MIDMUL_ALGO_FALLBACK:
109
// Make a copy of op1[0, len1).
111
// If modulus is odd, multiply it by the appropriate fudge factor
112
// so that we can use faster REDC reduction in the execute() routine.
113
res->op1 = (ulong*) malloc(len1 * sizeof(ulong));
116
ulong scale = zn_array_midmul_fallback_get_fudge(len1, len2, mod);
117
zn_array_scalar_mul(res->op1, op1, len1, scale, mod);
120
zn_array_copy(res->op1, op1, len1);
124
case ZNP_MIDMUL_ALGO_FFT:
126
res->precomp_fft = (struct zn_array_midmul_fft_precomp1_struct*)
127
malloc(sizeof(zn_array_midmul_fft_precomp1_t));
129
// we do scaling in this init() routine, to avoid doing it during
130
// each call to execute()
131
ulong scale = zn_array_midmul_fft_precomp1_get_fudge(len1, len2, mod);
132
zn_array_midmul_fft_precomp1_init(res->precomp_fft,
133
op1, len1, len2, scale, mod);
137
default: ZNP_ASSERT(0);
142
void zn_array_midmul_precomp1_clear(zn_array_midmul_precomp1_t op)
144
// dispatch to appropriate cleanup code
147
case ZNP_MIDMUL_ALGO_FALLBACK:
151
case ZNP_MIDMUL_ALGO_FFT:
152
zn_array_midmul_fft_precomp1_clear(op->precomp_fft);
153
free(op->precomp_fft);
156
default: ZNP_ASSERT(0);
162
void zn_array_midmul_precomp1_execute(
163
ulong* res, const ulong* op2,
164
const zn_array_midmul_precomp1_t precomp)
166
// dispatch to appropriate middle product code
167
switch (precomp->algo)
169
case ZNP_MIDMUL_ALGO_FALLBACK:
170
zn_array_midmul_fallback(res, precomp->op1, precomp->len1,
171
op2, precomp->len2, precomp->mod->n & 1,
175
case ZNP_MIDMUL_ALGO_FFT:
176
zn_array_midmul_fft_precomp1_execute(res, op2, 1,
177
precomp->precomp_fft);
180
default: ZNP_ASSERT(0);
185
// end of file ****************************************************************