~ubuntu-branches/ubuntu/wily/libzn-poly/wily

« back to all changes in this revision

Viewing changes to pmf.c

Committer: Bazaar Package Importer
Author(s): Tim Abbott
Date: 2008-05-27 20:23:43 UTC
Revision ID: james.westby@ubuntu.com-20080527202343-ufcb3fwj2as0edoz

Tags: upstream-0.8

Import upstream version 0.8

files added:

CHANGES

COPYING

README

REFERENCES

array-profile-main.c

array-profile.c

array.c

bernoulli.c

configure

gpl-2.0.txt

gpl-3.0.txt

invert-profile-main.c

invert-profile.c

invert-test.c

invert.c

makemakefile.py

midmul-profile-main.c

midmul-profile.c

midmul-tune.c

midmul.c

midmul_fft-test.c

midmul_fft.c

misc.c

mul-profile-main.c

mul-profile.c

mul-tune.c

mul.c

mul_fft-profile.c

mul_fft-test.c

mul_fft.c

mul_fft_dft.c

mul_ks-profile.c

mul_ks-test.c

mul_ks-tune.c

mul_ks.c

negamul-profile-main.c

negamul-profile.c

ntl-profile-dummy.c

ntl-profile.c

nussbaumer-test.c

nussbaumer-tune.c

nussbaumer.c

pack-test.c

pack.c

pmf.c

prof_main.c

profiler.c

profiler.h

ref_mul.c

support.c

support.h

test.c

tune.c

tuning.c

wide_arith.h

zn_mod.c

zn_poly.h

zn_poly_internal.h

Show diffs side-by-side

added added

removed removed

pmf.c

pmf.c: polynomials modulo a fermat polynomial

This file is part of the zn_poly library (version 0.8).

This program is free software: you can redistribute it and/or modify

it under the terms of the GNU General Public License as published by

the Free Software Foundation, either version 2 of the License, or

(at your option) version 3 of the License.

This program is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

GNU General Public License for more details.

You should have received a copy of the GNU General Public License

along with this program. If not, see <http://www.gnu.org/licenses/>.

#include "zn_poly_internal.h"

#if DEBUG

/* ============================================================================

debugging stuff

============================================================================ */

#include <stdio.h>

res := op, with bias set to zero.

Inplace operation not allowed.

void zn_pmf_normalise(zn_pmf_t res, zn_pmf_t op, ulong M, const zn_mod_t mod)

{

ZNP_ASSERT(res != op);

res[0] = 0;

ulong i, b = op[0] & (2*M - 1);

op++;

res++;

if (b < M)

{

for (i = 0; i < M-b; i++)

res[b+i] = op[i];

for (i = 0; i < b; i++)

res[i] = zn_mod_neg(op[i+M-b], mod);

}

else

{

b -= M;

for (i = 0; i < M-b; i++)

res[b+i] = zn_mod_neg(op[i], mod);

for (i = 0; i < b; i++)

res[i] = op[i+M-b];

}

void zn_pmf_print(const zn_pmf_t op, ulong M, const zn_mod_t mod)

{

ZNP_FASTALLOC(buf, ulong, 6624, M + 1);

zn_pmf_normalise(buf, op, M, mod);

printf("[%lu", buf[1]);

ulong i;

for (i = 1; i < M; i++)

printf(" %lu", buf[i+1]);

printf("]");

ZNP_FASTFREE(buf);

}

void zn_pmf_vec_print(const zn_pmf_vec_t op)

{

printf("M = %lu, K = %lu\n", op->M, op->K);

ulong i;

for (i = 0; i < op->K; i++)

{

printf("%3lu: ", i);

zn_pmf_print(op->data + i*op->skip, op->M, op->mod);

printf("\n");

}

100

void zn_pmf_vec_print_trunc(const zn_pmf_vec_t op, ulong length)

101

{

102

printf("M = %lu, K = %lu\n", op->M, op->K);

103

ulong i;

104

for (i = 0; i < length; i++)

105

{

106

printf("%3lu: ", i);

107

zn_pmf_print(op->data + i*op->skip, op->M, op->mod);

108

printf("\n");

109

}

110

}

111

112

#endif

113

114

115

116

/* ============================================================================

117

118

inplace array butterflies

119

120

============================================================================ */

121

122

123

124

op1 := op2 + op1

125

op2 := op2 - op1

126

where op1 and op2 are arrays of length _len_.

127

128

Inputs must be [0, n); outputs will be in [0, n).

129

130

void zn_array_bfly_inplace(ulong* op1, ulong* op2, ulong len,

131

const zn_mod_t mod)

132

{

133

ulong x, y;

134

135

if (zn_mod_is_slim(mod))

136

{

137

// slim version

138

// (unrolled)

139

for (; len >= 4; len -= 4)

140

{

141

x = *op1; y = *op2;

142

*op1++ = zn_mod_add_slim(y, x, mod);

143

*op2++ = zn_mod_sub_slim(y, x, mod);

144

145

x = *op1; y = *op2;

146

*op1++ = zn_mod_add_slim(y, x, mod);

147

*op2++ = zn_mod_sub_slim(y, x, mod);

148

149

x = *op1; y = *op2;

150

*op1++ = zn_mod_add_slim(y, x, mod);

151

*op2++ = zn_mod_sub_slim(y, x, mod);

152

153

x = *op1; y = *op2;

154

*op1++ = zn_mod_add_slim(y, x, mod);

155

*op2++ = zn_mod_sub_slim(y, x, mod);

156

}

157

for (; len; len--)

158

{

159

x = *op1; y = *op2;

160

*op1++ = zn_mod_add_slim(y, x, mod);

161

*op2++ = zn_mod_sub_slim(y, x, mod);

162

}

163

}

164

else

165

{

166

// non-slim version

167

// (unrolled)

168

for (; len >= 4; len -= 4)

169

{

170

x = *op1; y = *op2;

171

*op1++ = zn_mod_add(y, x, mod);

172

*op2++ = zn_mod_sub(y, x, mod);

173

174

x = *op1; y = *op2;

175

*op1++ = zn_mod_add(y, x, mod);

176

*op2++ = zn_mod_sub(y, x, mod);

177

178

x = *op1; y = *op2;

179

*op1++ = zn_mod_add(y, x, mod);

180

*op2++ = zn_mod_sub(y, x, mod);

181

182

x = *op1; y = *op2;

183

*op1++ = zn_mod_add(y, x, mod);

184

*op2++ = zn_mod_sub(y, x, mod);

185

}

186

for (; len; len--)

187

{

188

x = *op1; y = *op2;

189

*op1++ = zn_mod_add(y, x, mod);

190

*op2++ = zn_mod_sub(y, x, mod);

191

}

192

}

193

}

194

195

196

197

op1 := op1 + op2

198

where op1 and op2 are arrays of length _len_.

199

200

Inputs must be [0, n); outputs will be in [0, n).

201

202

void zn_array_add_inplace(ulong* op1, const ulong* op2, ulong len,

203

const zn_mod_t mod)

204

{

205

if (zn_mod_is_slim(mod))

206

{

207

// slim version

208

// (unrolled)

209

for (; len >= 4; len -= 4)

210

{

211

*op1 = zn_mod_add_slim(*op1, *op2, mod);

212

op1++; op2++;

213

*op1 = zn_mod_add_slim(*op1, *op2, mod);

214

op1++; op2++;

215

*op1 = zn_mod_add_slim(*op1, *op2, mod);

216

op1++; op2++;

217

*op1 = zn_mod_add_slim(*op1, *op2, mod);

218

op1++; op2++;

219

}

220

for (; len; len--)

221

{

222

*op1 = zn_mod_add_slim(*op1, *op2, mod);

223

op1++; op2++;

224

}

225

}

226

else

227

{

228

// non-slim version

229

// (unrolled)

230

for (; len >= 4; len -= 4)

231

{

232

*op1 = zn_mod_add(*op1, *op2, mod);

233

op1++; op2++;

234

*op1 = zn_mod_add(*op1, *op2, mod);

235

op1++; op2++;

236

*op1 = zn_mod_add(*op1, *op2, mod);

237

op1++; op2++;

238

*op1 = zn_mod_add(*op1, *op2, mod);

239

op1++; op2++;

240

}

241

for (; len; len--)

242

{

243

*op1 = zn_mod_add(*op1, *op2, mod);

244

op1++; op2++;

245

}

246

}

247

}

248

249

250

251

252

op1 := op1 - op2

253

where op1 and op2 are arrays of length _len_.

254

255

Inputs must be [0, n); outputs will be in [0, n).

256

257

void zn_array_sub_inplace(ulong* op1, const ulong* op2, ulong len,

258

const zn_mod_t mod)

259

{

260

if (zn_mod_is_slim(mod))

261

{

262

// slim version

263

// (unrolled)

264

for (; len >= 4; len -= 4)

265

{

266

*op1 = zn_mod_sub_slim(*op1, *op2, mod);

267

op1++; op2++;

268

*op1 = zn_mod_sub_slim(*op1, *op2, mod);

269

op1++; op2++;

270

*op1 = zn_mod_sub_slim(*op1, *op2, mod);

271

op1++; op2++;

272

*op1 = zn_mod_sub_slim(*op1, *op2, mod);

273

op1++; op2++;

274

}

275

for (; len; len--)

276

{

277

*op1 = zn_mod_sub_slim(*op1, *op2, mod);

278

op1++; op2++;

279

}

280

}

281

else

282

{

283

// non-slim version

284

// (unrolled)

285

for (; len >= 4; len -= 4)

286

{

287

*op1 = zn_mod_sub(*op1, *op2, mod);

288

op1++; op2++;

289

*op1 = zn_mod_sub(*op1, *op2, mod);

290

op1++; op2++;

291

*op1 = zn_mod_sub(*op1, *op2, mod);

292

op1++; op2++;

293

*op1 = zn_mod_sub(*op1, *op2, mod);

294

op1++; op2++;

295

}

296

for (; len; len--)

297

{

298

*op1 = zn_mod_sub(*op1, *op2, mod);

299

op1++; op2++;

300

}

301

}

302

}

303

304

305

306

/* ============================================================================

307

308

inplace zn_pmf_t butterflies

309

310

============================================================================ */

311

312

313

314

In the following routines, we work with the "relative bias" between the

315

two inputs, i.e. b = difference between bias of op1 and op2. This allows

316

us to avoid unnecessary normalisation steps; we just add and subtract

317

directly into the correct memory locations.

318

319

320

321

void zn_pmf_bfly(zn_pmf_t op1, zn_pmf_t op2, ulong M, const zn_mod_t mod)

322

{

323

ulong b = op2[0] - op1[0];

324

325

if (b & M)

326

{

327

// bias is in [M, 2M) mod 2M

328

b &= (M - 1);

329

330

// butterfly on op1[0, b) and op2[M-b, M)

331

zn_array_bfly_inplace(op1+1, op2+1+M-b, b, mod);

332

// butterfly on op1[b, M) and op2[0, M-b)

333

zn_array_bfly_inplace(op2+1, op1+1+b, M-b, mod);

334

}

335

else

336

{

337

// bias is in [0, M) mod 2M

338

b &= (M - 1);

339

340

// butterfly on op1[0, b) and op2[M-b, M)

341

zn_array_bfly_inplace(op2+1+M-b, op1+1, b, mod);

342

// butterfly on op1[b, M) and op2[0, M-b)

343

zn_array_bfly_inplace(op1+1+b, op2+1, M-b, mod);

344

}

345

}

346

347

348

void zn_pmf_add(zn_pmf_t op1, const zn_pmf_t op2, ulong M, const zn_mod_t mod)

349

{

350

ulong b = op2[0] - op1[0];

351

352

if (b & M)

353

{

354

// bias is in [M, 2M) mod 2M

355

b &= (M - 1);

356

357

// add op2[M-b, M) to op1[0, b)

358

zn_array_add_inplace(op1+1, op2+1+M-b, b, mod);

359

// subtract op2[0, M-b) from op1[b, M)

360

zn_array_sub_inplace(op1+1+b, op2+1, M-b, mod);

361

}

362

else

363

{

364

// bias is in [0, M) mod 2M

365

b &= (M - 1);

366

367

// subtract op2[M-b, M) from op1[0, b)

368

zn_array_sub_inplace(op1+1, op2+1+M-b, b, mod);

369

// add op2[0, M-b) to op1[b, M)

370

zn_array_add_inplace(op1+1+b, op2+1, M-b, mod);

371

}

372

}

373

374

375

void zn_pmf_sub(zn_pmf_t op1, const zn_pmf_t op2, ulong M, const zn_mod_t mod)

376

{

377

ulong b = op2[0] - op1[0];

378

379

if (b & M)

380

{

381

// bias is in [M, 2M) mod 2M

382

b &= (M - 1);

383

384

// subtract op2[M-b, M) from op1[0, b)

385

zn_array_sub_inplace(op1+1, op2+1+M-b, b, mod);

386

// add op2[0, M-b) to op1[b, M)

387

zn_array_add_inplace(op1+1+b, op2+1, M-b, mod);

388

}

389

else

390

{

391

// bias is in [0, M) mod 2M

392

b &= (M - 1);

393

394

// add op2[M-b, M) to op1[0, b)

395

zn_array_add_inplace(op1+1, op2+1+M-b, b, mod);

396

// subtract op2[0, M-b) from op1[b, M)

397

zn_array_sub_inplace(op1+1+b, op2+1, M-b, mod);

398

}

399

}

400

401

402

403

/* ============================================================================

404

405

zn_pmf_vec stuff

406

407

============================================================================ */

408

409

410

void zn_pmf_vec_init(zn_pmf_vec_t res, unsigned lgK, ptrdiff_t skip,

411

unsigned lgM, const zn_mod_t mod)

412

{

413

ZNP_ASSERT(skip >= (1UL << lgM) + 1);

414

415

res->lgK = lgK;

416

res->lgM = lgM;

417

res->skip = skip;

418

res->K = 1UL << lgK;

419

res->M = 1UL << lgM;

420

res->mod = mod;

421

res->data = (ulong*) malloc(sizeof(ulong) * skip * res->K);

422

}

423

424

425

void zn_pmf_vec_init_nussbaumer(zn_pmf_vec_t res, unsigned lgL,

426

const zn_mod_t mod)

427

{

428

unsigned lgK, lgM;

429

nussbaumer_params(&lgK, &lgM, lgL);

430

zn_pmf_vec_init(res, lgK, (1UL << lgM) + 1, lgM, mod);

431

}

432

433

434

void zn_pmf_vec_clear(zn_pmf_vec_t op)

435

{

436

free(op->data);

437

}

438

439

440

void zn_pmf_vec_set(zn_pmf_vec_t res, const zn_pmf_vec_t op)

441

{

442

ZNP_ASSERT(zn_pmf_vec_compatible(res, op));

443

444

ulong i;

445

for (i = 0; i < op->K; i++)

446

zn_pmf_set(res->data + i * res->skip, op->data + i * op->skip, op->M);

447

}

448

449

450

void zn_pmf_vec_scalar_mul(zn_pmf_vec_t op, ulong len, ulong x)

451

{

452

ZNP_ASSERT(len <= op->K);

453

454

ulong i;

455

zn_pmf_t ptr = op->data;

456

for (i = 0; i < len; i++, ptr += op->skip)

457

zn_pmf_scalar_mul(ptr, op->M, x, op->mod);

458

}

459

460

461

ulong zn_pmf_vec_mul_get_fudge(unsigned lgM, int squaring, const zn_mod_t mod)

462

{

463

int use_nussbaumer = (lgM >= (squaring

464

? tuning_info[mod->bits].nuss_sqr_crossover

465

: tuning_info[mod->bits].nuss_mul_crossover));

466

467

if (use_nussbaumer)

468

return nussbaumer_mul_get_fudge(lgM, squaring, mod);

469

else

470

return _zn_array_mul_get_fudge(1UL << lgM, 1UL << lgM, squaring, mod);

471

}

472

473

474

void zn_pmf_vec_mul(zn_pmf_vec_t res, const zn_pmf_vec_t op1,

475

const zn_pmf_vec_t op2, ulong length,

476

int special_first_two)

477

{

478

ZNP_ASSERT(res->mod->n & 1);

479

ZNP_ASSERT(zn_pmf_vec_compatible(res, op1));

480

ZNP_ASSERT(zn_pmf_vec_compatible(res, op2));

481

ZNP_ASSERT(res->M >= 2 || !special_first_two);

482

483

zn_pmf_t res_ptr = res->data;

484

zn_pmf_const_t op1_ptr = op1->data;

485

zn_pmf_const_t op2_ptr = op2->data;

486

487

const zn_mod_struct* mod = res->mod;

488

489

ulong M = op1->M;

490

unsigned lgM = op1->lgM;

491

492

int squaring = (op1 == op2);

493

494

// use nussbaumer algorithm if the pointwise mults are large enough

495

int use_nussbaumer = (lgM >= (squaring

496

? tuning_info[mod->bits].nuss_sqr_crossover

497

: tuning_info[mod->bits].nuss_mul_crossover));

498

499

// scratch space for nussbaumer multiplications

500

zn_pmf_vec_t temp1, temp2;

501

unsigned nuss_lgK;

502

503

if (use_nussbaumer)

504

{

505

zn_pmf_vec_init_nussbaumer(temp1, lgM, mod);

506

zn_pmf_vec_init_nussbaumer(temp2, lgM, mod);

507

nuss_lgK = temp1->lgK;

508

}

509

510

ulong i = 0;

511

512

if (special_first_two)

513

{

514

ZNP_FASTALLOC(temp, ulong, 6624, 2*M);

515

516

// need to adjust the fudge factors, so that the fudge factor for these

517

// first two special products matches up with the fudge factors for the

518

// remaining products

519

ulong fixup;

520

fixup = use_nussbaumer ? nussbaumer_mul_get_fudge(lgM, squaring, mod)

521

: _zn_array_mul_get_fudge(M, M, squaring, mod);

522

fixup = zn_mod_mul(_zn_array_mul_get_fudge(M/2, M/2, squaring, mod),

523

zn_mod_invert(fixup, mod), mod);

524

525

// length M/2 multiplications

526

for (; i < 2 && i < length; i++, res_ptr += res->skip,

527

op1_ptr += op1->skip, op2_ptr += op2->skip)

528

{

529

// add biases

530

res_ptr[0] = op1_ptr[0] + op2_ptr[0];

531

532

// do the actual multiplication

533

_zn_array_mul(temp, op1_ptr + 1, M/2, op2_ptr + 1, M/2, 1, mod);

534

535

// apply the fudge factor

536

if (fixup == 1)

537

zn_array_copy(res_ptr + 1, temp, M - 1);

538

else

539

zn_array_scalar_mul(res_ptr + 1, temp, M - 1, fixup, mod);

540

res_ptr[M] = 0;

541

}

542

543

ZNP_FASTFREE(temp);

544

}

545

546

if (use_nussbaumer)

547

{

548

for (; i < length; i++, res_ptr += res->skip,

549

op1_ptr += op1->skip, op2_ptr += op2->skip)

550

{

551

// add biases

552

res_ptr[0] = op1_ptr[0] + op2_ptr[0];

553

554

nussbaumer_mul(res_ptr + 1, op1_ptr + 1, op2_ptr + 1, temp1, temp2);

555

}

556

557

zn_pmf_vec_clear(temp2);

558

zn_pmf_vec_clear(temp1);

559

}

560

else

561

{

562

// scratch space for KS negacyclic multiplication

563

ZNP_FASTALLOC(temp, ulong, 6624, 2*M);

564

temp[2*M - 1] = 0;

565

566

for (; i < length; i++, res_ptr += res->skip,

567

op1_ptr += op1->skip, op2_ptr += op2->skip)

568

{

569

// add biases

570

res_ptr[0] = op1_ptr[0] + op2_ptr[0];

571

572

// ordinary multiplication...

573

_zn_array_mul(temp, op1_ptr + 1, M, op2_ptr + 1, M, 1, mod);

574

// ... negacyclic reduction

575

zn_array_sub(res_ptr + 1, temp, temp + M, M, mod);

576

}

577

578

ZNP_FASTFREE(temp);

579

}

580

}

581

582

583

584

void zn_pmf_vec_reverse(zn_pmf_vec_t op, ulong length)

585

{

586

op->data += op->skip * (length - 1);

587

op->skip = -op->skip;

588

}

589

590

591

592

// end of file ****************************************************************

Older »