2
pmf.c: polynomials modulo a fermat polynomial
4
Copyright (C) 2007, 2008, David Harvey
6
This file is part of the zn_poly library (version 0.8).
8
This program is free software: you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation, either version 2 of the License, or
11
(at your option) version 3 of the License.
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with this program. If not, see <http://www.gnu.org/licenses/>.
23
#include "zn_poly_internal.h"
28
/* ============================================================================
32
============================================================================ */
38
res := op, with bias set to zero.
40
Inplace operation not allowed.
42
void zn_pmf_normalise(zn_pmf_t res, zn_pmf_t op, ulong M, const zn_mod_t mod)
44
ZNP_ASSERT(res != op);
48
ulong i, b = op[0] & (2*M - 1);
55
for (i = 0; i < M-b; i++)
57
for (i = 0; i < b; i++)
58
res[i] = zn_mod_neg(op[i+M-b], mod);
63
for (i = 0; i < M-b; i++)
64
res[b+i] = zn_mod_neg(op[i], mod);
65
for (i = 0; i < b; i++)
71
void zn_pmf_print(const zn_pmf_t op, ulong M, const zn_mod_t mod)
73
ZNP_FASTALLOC(buf, ulong, 6624, M + 1);
75
zn_pmf_normalise(buf, op, M, mod);
77
printf("[%lu", buf[1]);
79
for (i = 1; i < M; i++)
80
printf(" %lu", buf[i+1]);
87
void zn_pmf_vec_print(const zn_pmf_vec_t op)
89
printf("M = %lu, K = %lu\n", op->M, op->K);
91
for (i = 0; i < op->K; i++)
94
zn_pmf_print(op->data + i*op->skip, op->M, op->mod);
100
void zn_pmf_vec_print_trunc(const zn_pmf_vec_t op, ulong length)
102
printf("M = %lu, K = %lu\n", op->M, op->K);
104
for (i = 0; i < length; i++)
107
zn_pmf_print(op->data + i*op->skip, op->M, op->mod);
116
/* ============================================================================
118
inplace array butterflies
120
============================================================================ */
126
where op1 and op2 are arrays of length _len_.
128
Inputs must be [0, n); outputs will be in [0, n).
130
void zn_array_bfly_inplace(ulong* op1, ulong* op2, ulong len,
135
if (zn_mod_is_slim(mod))
139
for (; len >= 4; len -= 4)
142
*op1++ = zn_mod_add_slim(y, x, mod);
143
*op2++ = zn_mod_sub_slim(y, x, mod);
146
*op1++ = zn_mod_add_slim(y, x, mod);
147
*op2++ = zn_mod_sub_slim(y, x, mod);
150
*op1++ = zn_mod_add_slim(y, x, mod);
151
*op2++ = zn_mod_sub_slim(y, x, mod);
154
*op1++ = zn_mod_add_slim(y, x, mod);
155
*op2++ = zn_mod_sub_slim(y, x, mod);
160
*op1++ = zn_mod_add_slim(y, x, mod);
161
*op2++ = zn_mod_sub_slim(y, x, mod);
168
for (; len >= 4; len -= 4)
171
*op1++ = zn_mod_add(y, x, mod);
172
*op2++ = zn_mod_sub(y, x, mod);
175
*op1++ = zn_mod_add(y, x, mod);
176
*op2++ = zn_mod_sub(y, x, mod);
179
*op1++ = zn_mod_add(y, x, mod);
180
*op2++ = zn_mod_sub(y, x, mod);
183
*op1++ = zn_mod_add(y, x, mod);
184
*op2++ = zn_mod_sub(y, x, mod);
189
*op1++ = zn_mod_add(y, x, mod);
190
*op2++ = zn_mod_sub(y, x, mod);
198
where op1 and op2 are arrays of length _len_.
200
Inputs must be [0, n); outputs will be in [0, n).
202
void zn_array_add_inplace(ulong* op1, const ulong* op2, ulong len,
205
if (zn_mod_is_slim(mod))
209
for (; len >= 4; len -= 4)
211
*op1 = zn_mod_add_slim(*op1, *op2, mod);
213
*op1 = zn_mod_add_slim(*op1, *op2, mod);
215
*op1 = zn_mod_add_slim(*op1, *op2, mod);
217
*op1 = zn_mod_add_slim(*op1, *op2, mod);
222
*op1 = zn_mod_add_slim(*op1, *op2, mod);
230
for (; len >= 4; len -= 4)
232
*op1 = zn_mod_add(*op1, *op2, mod);
234
*op1 = zn_mod_add(*op1, *op2, mod);
236
*op1 = zn_mod_add(*op1, *op2, mod);
238
*op1 = zn_mod_add(*op1, *op2, mod);
243
*op1 = zn_mod_add(*op1, *op2, mod);
253
where op1 and op2 are arrays of length _len_.
255
Inputs must be [0, n); outputs will be in [0, n).
257
void zn_array_sub_inplace(ulong* op1, const ulong* op2, ulong len,
260
if (zn_mod_is_slim(mod))
264
for (; len >= 4; len -= 4)
266
*op1 = zn_mod_sub_slim(*op1, *op2, mod);
268
*op1 = zn_mod_sub_slim(*op1, *op2, mod);
270
*op1 = zn_mod_sub_slim(*op1, *op2, mod);
272
*op1 = zn_mod_sub_slim(*op1, *op2, mod);
277
*op1 = zn_mod_sub_slim(*op1, *op2, mod);
285
for (; len >= 4; len -= 4)
287
*op1 = zn_mod_sub(*op1, *op2, mod);
289
*op1 = zn_mod_sub(*op1, *op2, mod);
291
*op1 = zn_mod_sub(*op1, *op2, mod);
293
*op1 = zn_mod_sub(*op1, *op2, mod);
298
*op1 = zn_mod_sub(*op1, *op2, mod);
306
/* ============================================================================
308
inplace zn_pmf_t butterflies
310
============================================================================ */
314
In the following routines, we work with the "relative bias" between the
315
two inputs, i.e. b = difference between bias of op1 and op2. This allows
316
us to avoid unnecessary normalisation steps; we just add and subtract
317
directly into the correct memory locations.
321
void zn_pmf_bfly(zn_pmf_t op1, zn_pmf_t op2, ulong M, const zn_mod_t mod)
323
ulong b = op2[0] - op1[0];
327
// bias is in [M, 2M) mod 2M
330
// butterfly on op1[0, b) and op2[M-b, M)
331
zn_array_bfly_inplace(op1+1, op2+1+M-b, b, mod);
332
// butterfly on op1[b, M) and op2[0, M-b)
333
zn_array_bfly_inplace(op2+1, op1+1+b, M-b, mod);
337
// bias is in [0, M) mod 2M
340
// butterfly on op1[0, b) and op2[M-b, M)
341
zn_array_bfly_inplace(op2+1+M-b, op1+1, b, mod);
342
// butterfly on op1[b, M) and op2[0, M-b)
343
zn_array_bfly_inplace(op1+1+b, op2+1, M-b, mod);
348
void zn_pmf_add(zn_pmf_t op1, const zn_pmf_t op2, ulong M, const zn_mod_t mod)
350
ulong b = op2[0] - op1[0];
354
// bias is in [M, 2M) mod 2M
357
// add op2[M-b, M) to op1[0, b)
358
zn_array_add_inplace(op1+1, op2+1+M-b, b, mod);
359
// subtract op2[0, M-b) from op1[b, M)
360
zn_array_sub_inplace(op1+1+b, op2+1, M-b, mod);
364
// bias is in [0, M) mod 2M
367
// subtract op2[M-b, M) from op1[0, b)
368
zn_array_sub_inplace(op1+1, op2+1+M-b, b, mod);
369
// add op2[0, M-b) to op1[b, M)
370
zn_array_add_inplace(op1+1+b, op2+1, M-b, mod);
375
void zn_pmf_sub(zn_pmf_t op1, const zn_pmf_t op2, ulong M, const zn_mod_t mod)
377
ulong b = op2[0] - op1[0];
381
// bias is in [M, 2M) mod 2M
384
// subtract op2[M-b, M) from op1[0, b)
385
zn_array_sub_inplace(op1+1, op2+1+M-b, b, mod);
386
// add op2[0, M-b) to op1[b, M)
387
zn_array_add_inplace(op1+1+b, op2+1, M-b, mod);
391
// bias is in [0, M) mod 2M
394
// add op2[M-b, M) to op1[0, b)
395
zn_array_add_inplace(op1+1, op2+1+M-b, b, mod);
396
// subtract op2[0, M-b) from op1[b, M)
397
zn_array_sub_inplace(op1+1+b, op2+1, M-b, mod);
403
/* ============================================================================
407
============================================================================ */
410
void zn_pmf_vec_init(zn_pmf_vec_t res, unsigned lgK, ptrdiff_t skip,
411
unsigned lgM, const zn_mod_t mod)
413
ZNP_ASSERT(skip >= (1UL << lgM) + 1);
421
res->data = (ulong*) malloc(sizeof(ulong) * skip * res->K);
425
void zn_pmf_vec_init_nussbaumer(zn_pmf_vec_t res, unsigned lgL,
429
nussbaumer_params(&lgK, &lgM, lgL);
430
zn_pmf_vec_init(res, lgK, (1UL << lgM) + 1, lgM, mod);
434
void zn_pmf_vec_clear(zn_pmf_vec_t op)
440
void zn_pmf_vec_set(zn_pmf_vec_t res, const zn_pmf_vec_t op)
442
ZNP_ASSERT(zn_pmf_vec_compatible(res, op));
445
for (i = 0; i < op->K; i++)
446
zn_pmf_set(res->data + i * res->skip, op->data + i * op->skip, op->M);
450
void zn_pmf_vec_scalar_mul(zn_pmf_vec_t op, ulong len, ulong x)
452
ZNP_ASSERT(len <= op->K);
455
zn_pmf_t ptr = op->data;
456
for (i = 0; i < len; i++, ptr += op->skip)
457
zn_pmf_scalar_mul(ptr, op->M, x, op->mod);
461
ulong zn_pmf_vec_mul_get_fudge(unsigned lgM, int squaring, const zn_mod_t mod)
463
int use_nussbaumer = (lgM >= (squaring
464
? tuning_info[mod->bits].nuss_sqr_crossover
465
: tuning_info[mod->bits].nuss_mul_crossover));
468
return nussbaumer_mul_get_fudge(lgM, squaring, mod);
470
return _zn_array_mul_get_fudge(1UL << lgM, 1UL << lgM, squaring, mod);
474
void zn_pmf_vec_mul(zn_pmf_vec_t res, const zn_pmf_vec_t op1,
475
const zn_pmf_vec_t op2, ulong length,
476
int special_first_two)
478
ZNP_ASSERT(res->mod->n & 1);
479
ZNP_ASSERT(zn_pmf_vec_compatible(res, op1));
480
ZNP_ASSERT(zn_pmf_vec_compatible(res, op2));
481
ZNP_ASSERT(res->M >= 2 || !special_first_two);
483
zn_pmf_t res_ptr = res->data;
484
zn_pmf_const_t op1_ptr = op1->data;
485
zn_pmf_const_t op2_ptr = op2->data;
487
const zn_mod_struct* mod = res->mod;
490
unsigned lgM = op1->lgM;
492
int squaring = (op1 == op2);
494
// use nussbaumer algorithm if the pointwise mults are large enough
495
int use_nussbaumer = (lgM >= (squaring
496
? tuning_info[mod->bits].nuss_sqr_crossover
497
: tuning_info[mod->bits].nuss_mul_crossover));
499
// scratch space for nussbaumer multiplications
500
zn_pmf_vec_t temp1, temp2;
505
zn_pmf_vec_init_nussbaumer(temp1, lgM, mod);
506
zn_pmf_vec_init_nussbaumer(temp2, lgM, mod);
507
nuss_lgK = temp1->lgK;
512
if (special_first_two)
514
ZNP_FASTALLOC(temp, ulong, 6624, 2*M);
516
// need to adjust the fudge factors, so that the fudge factor for these
517
// first two special products matches up with the fudge factors for the
518
// remaining products
520
fixup = use_nussbaumer ? nussbaumer_mul_get_fudge(lgM, squaring, mod)
521
: _zn_array_mul_get_fudge(M, M, squaring, mod);
522
fixup = zn_mod_mul(_zn_array_mul_get_fudge(M/2, M/2, squaring, mod),
523
zn_mod_invert(fixup, mod), mod);
525
// length M/2 multiplications
526
for (; i < 2 && i < length; i++, res_ptr += res->skip,
527
op1_ptr += op1->skip, op2_ptr += op2->skip)
530
res_ptr[0] = op1_ptr[0] + op2_ptr[0];
532
// do the actual multiplication
533
_zn_array_mul(temp, op1_ptr + 1, M/2, op2_ptr + 1, M/2, 1, mod);
535
// apply the fudge factor
537
zn_array_copy(res_ptr + 1, temp, M - 1);
539
zn_array_scalar_mul(res_ptr + 1, temp, M - 1, fixup, mod);
548
for (; i < length; i++, res_ptr += res->skip,
549
op1_ptr += op1->skip, op2_ptr += op2->skip)
552
res_ptr[0] = op1_ptr[0] + op2_ptr[0];
554
nussbaumer_mul(res_ptr + 1, op1_ptr + 1, op2_ptr + 1, temp1, temp2);
557
zn_pmf_vec_clear(temp2);
558
zn_pmf_vec_clear(temp1);
562
// scratch space for KS negacyclic multiplication
563
ZNP_FASTALLOC(temp, ulong, 6624, 2*M);
566
for (; i < length; i++, res_ptr += res->skip,
567
op1_ptr += op1->skip, op2_ptr += op2->skip)
570
res_ptr[0] = op1_ptr[0] + op2_ptr[0];
572
// ordinary multiplication...
573
_zn_array_mul(temp, op1_ptr + 1, M, op2_ptr + 1, M, 1, mod);
574
// ... negacyclic reduction
575
zn_array_sub(res_ptr + 1, temp, temp + M, M, mod);
584
void zn_pmf_vec_reverse(zn_pmf_vec_t op, ulong length)
586
op->data += op->skip * (length - 1);
587
op->skip = -op->skip;
592
// end of file ****************************************************************