2
* Ed25519 field element
5
* Based on the public domain code from SUPERCOP ref10 by
6
* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
8
* Botan is released under the Simplified BSD License (see license.txt)
11
#include <botan/internal/ed25519_fe.h>
12
#include <botan/internal/ed25519_internal.h>
17
FE_25519 FE_25519::invert(const FE_25519& z)
25
fe_sq_iter(t1, t0, 2);
30
fe_sq_iter(t2, t1, 5);
32
fe_sq_iter(t2, t1, 10);
34
fe_sq_iter(t3, t2, 20);
36
fe_sq_iter(t2, t2, 10);
38
fe_sq_iter(t2, t1, 50);
40
fe_sq_iter(t3, t2, 100);
42
fe_sq_iter(t2, t2, 50);
44
fe_sq_iter(t1, t1, 5);
50
FE_25519 FE_25519::pow_22523(const fe& z)
57
fe_sq_iter(t1, t0, 2);
62
fe_sq_iter(t1, t0, 5);
64
fe_sq_iter(t1, t0, 10);
66
fe_sq_iter(t2, t1, 20);
68
fe_sq_iter(t1, t1, 10);
70
fe_sq_iter(t1, t0, 50);
72
fe_sq_iter(t2, t1, 100);
74
fe_sq_iter(t1, t1, 50);
76
fe_sq_iter(t0, t0, 2);
84
Can overlap h with f or g.
87
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
91
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
95
Notes on implementation strategy:
97
Using schoolbook multiplication.
98
Karatsuba would save a little in some cost models.
100
Most multiplications by 2 and 19 are 32-bit precomputations;
101
cheaper than 64-bit postcomputations.
103
There is one remaining multiplication by 19 in the carry chain;
104
one *19 precomputation can be merged into this,
105
but the resulting data flow is considerably less clean.
107
There are 12 carries below.
108
10 of them are 2-way parallelizable and vectorizable.
109
Can get away with 11 carries, but then data flow is much deeper.
111
With tighter constraints on inputs can squeeze carries into int32.
115
FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g)
139
int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
140
int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
141
int32_t g3_19 = 19 * g3;
142
int32_t g4_19 = 19 * g4;
143
int32_t g5_19 = 19 * g5;
144
int32_t g6_19 = 19 * g6;
145
int32_t g7_19 = 19 * g7;
146
int32_t g8_19 = 19 * g8;
147
int32_t g9_19 = 19 * g9;
148
int32_t f1_2 = 2 * f1;
149
int32_t f3_2 = 2 * f3;
150
int32_t f5_2 = 2 * f5;
151
int32_t f7_2 = 2 * f7;
152
int32_t f9_2 = 2 * f9;
153
int64_t f0g0 = f0 * static_cast<int64_t>(g0);
154
int64_t f0g1 = f0 * static_cast<int64_t>(g1);
155
int64_t f0g2 = f0 * static_cast<int64_t>(g2);
156
int64_t f0g3 = f0 * static_cast<int64_t>(g3);
157
int64_t f0g4 = f0 * static_cast<int64_t>(g4);
158
int64_t f0g5 = f0 * static_cast<int64_t>(g5);
159
int64_t f0g6 = f0 * static_cast<int64_t>(g6);
160
int64_t f0g7 = f0 * static_cast<int64_t>(g7);
161
int64_t f0g8 = f0 * static_cast<int64_t>(g8);
162
int64_t f0g9 = f0 * static_cast<int64_t>(g9);
163
int64_t f1g0 = f1 * static_cast<int64_t>(g0);
164
int64_t f1g1_2 = f1_2 * static_cast<int64_t>(g1);
165
int64_t f1g2 = f1 * static_cast<int64_t>(g2);
166
int64_t f1g3_2 = f1_2 * static_cast<int64_t>(g3);
167
int64_t f1g4 = f1 * static_cast<int64_t>(g4);
168
int64_t f1g5_2 = f1_2 * static_cast<int64_t>(g5);
169
int64_t f1g6 = f1 * static_cast<int64_t>(g6);
170
int64_t f1g7_2 = f1_2 * static_cast<int64_t>(g7);
171
int64_t f1g8 = f1 * static_cast<int64_t>(g8);
172
int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
173
int64_t f2g0 = f2 * static_cast<int64_t>(g0);
174
int64_t f2g1 = f2 * static_cast<int64_t>(g1);
175
int64_t f2g2 = f2 * static_cast<int64_t>(g2);
176
int64_t f2g3 = f2 * static_cast<int64_t>(g3);
177
int64_t f2g4 = f2 * static_cast<int64_t>(g4);
178
int64_t f2g5 = f2 * static_cast<int64_t>(g5);
179
int64_t f2g6 = f2 * static_cast<int64_t>(g6);
180
int64_t f2g7 = f2 * static_cast<int64_t>(g7);
181
int64_t f2g8_19 = f2 * static_cast<int64_t>(g8_19);
182
int64_t f2g9_19 = f2 * static_cast<int64_t>(g9_19);
183
int64_t f3g0 = f3 * static_cast<int64_t>(g0);
184
int64_t f3g1_2 = f3_2 * static_cast<int64_t>(g1);
185
int64_t f3g2 = f3 * static_cast<int64_t>(g2);
186
int64_t f3g3_2 = f3_2 * static_cast<int64_t>(g3);
187
int64_t f3g4 = f3 * static_cast<int64_t>(g4);
188
int64_t f3g5_2 = f3_2 * static_cast<int64_t>(g5);
189
int64_t f3g6 = f3 * static_cast<int64_t>(g6);
190
int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
191
int64_t f3g8_19 = f3 * static_cast<int64_t>(g8_19);
192
int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
193
int64_t f4g0 = f4 * static_cast<int64_t>(g0);
194
int64_t f4g1 = f4 * static_cast<int64_t>(g1);
195
int64_t f4g2 = f4 * static_cast<int64_t>(g2);
196
int64_t f4g3 = f4 * static_cast<int64_t>(g3);
197
int64_t f4g4 = f4 * static_cast<int64_t>(g4);
198
int64_t f4g5 = f4 * static_cast<int64_t>(g5);
199
int64_t f4g6_19 = f4 * static_cast<int64_t>(g6_19);
200
int64_t f4g7_19 = f4 * static_cast<int64_t>(g7_19);
201
int64_t f4g8_19 = f4 * static_cast<int64_t>(g8_19);
202
int64_t f4g9_19 = f4 * static_cast<int64_t>(g9_19);
203
int64_t f5g0 = f5 * static_cast<int64_t>(g0);
204
int64_t f5g1_2 = f5_2 * static_cast<int64_t>(g1);
205
int64_t f5g2 = f5 * static_cast<int64_t>(g2);
206
int64_t f5g3_2 = f5_2 * static_cast<int64_t>(g3);
207
int64_t f5g4 = f5 * static_cast<int64_t>(g4);
208
int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
209
int64_t f5g6_19 = f5 * static_cast<int64_t>(g6_19);
210
int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
211
int64_t f5g8_19 = f5 * static_cast<int64_t>(g8_19);
212
int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
213
int64_t f6g0 = f6 * static_cast<int64_t>(g0);
214
int64_t f6g1 = f6 * static_cast<int64_t>(g1);
215
int64_t f6g2 = f6 * static_cast<int64_t>(g2);
216
int64_t f6g3 = f6 * static_cast<int64_t>(g3);
217
int64_t f6g4_19 = f6 * static_cast<int64_t>(g4_19);
218
int64_t f6g5_19 = f6 * static_cast<int64_t>(g5_19);
219
int64_t f6g6_19 = f6 * static_cast<int64_t>(g6_19);
220
int64_t f6g7_19 = f6 * static_cast<int64_t>(g7_19);
221
int64_t f6g8_19 = f6 * static_cast<int64_t>(g8_19);
222
int64_t f6g9_19 = f6 * static_cast<int64_t>(g9_19);
223
int64_t f7g0 = f7 * static_cast<int64_t>(g0);
224
int64_t f7g1_2 = f7_2 * static_cast<int64_t>(g1);
225
int64_t f7g2 = f7 * static_cast<int64_t>(g2);
226
int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
227
int64_t f7g4_19 = f7 * static_cast<int64_t>(g4_19);
228
int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
229
int64_t f7g6_19 = f7 * static_cast<int64_t>(g6_19);
230
int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
231
int64_t f7g8_19 = f7 * static_cast<int64_t>(g8_19);
232
int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
233
int64_t f8g0 = f8 * static_cast<int64_t>(g0);
234
int64_t f8g1 = f8 * static_cast<int64_t>(g1);
235
int64_t f8g2_19 = f8 * static_cast<int64_t>(g2_19);
236
int64_t f8g3_19 = f8 * static_cast<int64_t>(g3_19);
237
int64_t f8g4_19 = f8 * static_cast<int64_t>(g4_19);
238
int64_t f8g5_19 = f8 * static_cast<int64_t>(g5_19);
239
int64_t f8g6_19 = f8 * static_cast<int64_t>(g6_19);
240
int64_t f8g7_19 = f8 * static_cast<int64_t>(g7_19);
241
int64_t f8g8_19 = f8 * static_cast<int64_t>(g8_19);
242
int64_t f8g9_19 = f8 * static_cast<int64_t>(g9_19);
243
int64_t f9g0 = f9 * static_cast<int64_t>(g0);
244
int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
245
int64_t f9g2_19 = f9 * static_cast<int64_t>(g2_19);
246
int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
247
int64_t f9g4_19 = f9 * static_cast<int64_t>(g4_19);
248
int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
249
int64_t f9g6_19 = f9 * static_cast<int64_t>(g6_19);
250
int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
251
int64_t f9g8_19 = f9 * static_cast<int64_t>(g8_19);
252
int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
253
int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
254
int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
255
int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
256
int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
257
int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
258
int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
259
int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
260
int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
261
int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
262
int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
275
|h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
276
i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
277
|h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
278
i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
280
const int64_t X24 = (1 << 24);
281
const int64_t X25 = (1 << 25);
282
const int64_t X26 = (1 << 26);
284
carry0 = (h0 + X25) >> 26;
287
carry4 = (h4 + X25) >> 26;
292
/* |h1| <= 1.71*2^59 */
293
/* |h5| <= 1.71*2^59 */
295
carry1 = (h1 + X24) >> 25;
298
carry5 = (h5 + X24) >> 25;
301
/* |h1| <= 2^24; from now on fits into int32 */
302
/* |h5| <= 2^24; from now on fits into int32 */
303
/* |h2| <= 1.41*2^60 */
304
/* |h6| <= 1.41*2^60 */
306
carry2 = (h2 + X25) >> 26;
309
carry6 = (h6 + X25) >> 26;
312
/* |h2| <= 2^25; from now on fits into int32 unchanged */
313
/* |h6| <= 2^25; from now on fits into int32 unchanged */
314
/* |h3| <= 1.71*2^59 */
315
/* |h7| <= 1.71*2^59 */
317
carry3 = (h3 + X24) >> 25;
320
carry7 = (h7 + X24) >> 25;
323
/* |h3| <= 2^24; from now on fits into int32 unchanged */
324
/* |h7| <= 2^24; from now on fits into int32 unchanged */
325
/* |h4| <= 1.72*2^34 */
326
/* |h8| <= 1.41*2^60 */
328
carry4 = (h4 + X25) >> 26;
331
carry8 = (h8 + X25) >> 26;
334
/* |h4| <= 2^25; from now on fits into int32 unchanged */
335
/* |h8| <= 2^25; from now on fits into int32 unchanged */
336
/* |h5| <= 1.01*2^24 */
337
/* |h9| <= 1.71*2^59 */
339
carry9 = (h9 + X24) >> 25;
342
/* |h9| <= 2^24; from now on fits into int32 unchanged */
343
/* |h0| <= 1.1*2^39 */
345
carry0 = (h0 + X25) >> 26;
348
/* |h0| <= 2^25; from now on fits into int32 unchanged */
349
/* |h1| <= 1.01*2^24 */
351
return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
356
Can overlap h with f.
359
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
362
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
366
See fe_mul.c for discussion of implementation strategy.
370
FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter)
372
const int64_t X24 = (1 << 24);
373
const int64_t X25 = (1 << 25);
374
const int64_t X26 = (1 << 26);
387
for(size_t i = 0; i != iter; ++i)
389
const int32_t f0_2 = 2 * f0;
390
const int32_t f1_2 = 2 * f1;
391
const int32_t f2_2 = 2 * f2;
392
const int32_t f3_2 = 2 * f3;
393
const int32_t f4_2 = 2 * f4;
394
const int32_t f5_2 = 2 * f5;
395
const int32_t f6_2 = 2 * f6;
396
const int32_t f7_2 = 2 * f7;
397
const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
398
const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
399
const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
400
const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
401
const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
403
const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
404
const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
405
const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
406
const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
407
const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
408
const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
409
const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
410
const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
411
const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
412
const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
413
const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
414
const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
415
const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
416
const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
417
const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
418
const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
419
const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
420
const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
421
const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
422
const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
423
const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
424
const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
425
const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
426
const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
427
const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
428
const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
429
const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
430
const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
431
const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
432
const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
433
const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
434
const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
435
const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
436
const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
437
const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
438
const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
439
const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
440
const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
441
const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
442
const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
443
const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
444
const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
445
const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
446
const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
447
const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
448
const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
449
const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
450
const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
451
const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
452
const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
453
const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
454
const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
455
const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
456
const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
457
const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
459
int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
460
int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
461
int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
462
int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
463
int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
464
int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
465
int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
466
int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
467
int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
468
int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
481
carry0 = (h0 + X25) >> 26;
484
carry4 = (h4 + X25) >> 26;
488
carry1 = (h1 + X24) >> 25;
491
carry5 = (h5 + X24) >> 25;
495
carry2 = (h2 + X25) >> 26;
498
carry6 = (h6 + X25) >> 26;
502
carry3 = (h3 + X24) >> 25;
505
carry7 = (h7 + X24) >> 25;
509
carry4 = (h4 + X25) >> 26;
512
carry8 = (h8 + X25) >> 26;
516
carry9 = (h9 + X24) >> 25;
519
carry0 = (h0 + X25) >> 26;
535
return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
540
Can overlap h with f.
543
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
546
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
550
See fe_mul.c for discussion of implementation strategy.
554
FE_25519 FE_25519::sqr2(const FE_25519& f)
556
const int64_t X24 = (1 << 24);
557
const int64_t X25 = (1 << 25);
558
const int64_t X26 = (1 << 26);
570
int32_t f0_2 = 2 * f0;
571
int32_t f1_2 = 2 * f1;
572
int32_t f2_2 = 2 * f2;
573
int32_t f3_2 = 2 * f3;
574
int32_t f4_2 = 2 * f4;
575
int32_t f5_2 = 2 * f5;
576
int32_t f6_2 = 2 * f6;
577
int32_t f7_2 = 2 * f7;
578
int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
579
int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
580
int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
581
int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
582
int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
583
int64_t f0f0 = f0 * static_cast<int64_t>(f0);
584
int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
585
int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
586
int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
587
int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
588
int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
589
int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
590
int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
591
int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
592
int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
593
int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
594
int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
595
int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
596
int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
597
int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
598
int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
599
int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
600
int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
601
int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
602
int64_t f2f2 = f2 * static_cast<int64_t>(f2);
603
int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
604
int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
605
int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
606
int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
607
int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
608
int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
609
int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
610
int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
611
int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
612
int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
613
int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
614
int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
615
int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
616
int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
617
int64_t f4f4 = f4 * static_cast<int64_t>(f4);
618
int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
619
int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
620
int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
621
int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
622
int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
623
int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
624
int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
625
int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
626
int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
627
int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
628
int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
629
int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
630
int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
631
int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
632
int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
633
int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
634
int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
635
int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
636
int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
637
int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
638
int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
639
int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
640
int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
641
int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
642
int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
643
int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
644
int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
645
int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
646
int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
647
int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
670
carry0 = (h0 + X25) >> 26;
673
carry4 = (h4 + X25) >> 26;
677
carry1 = (h1 + X24) >> 25;
680
carry5 = (h5 + X24) >> 25;
684
carry2 = (h2 + X25) >> 26;
687
carry6 = (h6 + X25) >> 26;
691
carry3 = (h3 + X24) >> 25;
694
carry7 = (h7 + X24) >> 25;
698
carry4 = (h4 + X25) >> 26;
701
carry8 = (h8 + X25) >> 26;
705
carry9 = (h9 + X24) >> 25;
709
carry0 = (h0 + X25) >> 26;
713
return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
717
Ignores top bit of h.
720
void FE_25519::from_bytes(const uint8_t s[32])
722
const int64_t X24 = (1 << 24);
723
const int64_t X25 = (1 << 25);
724
const int64_t X26 = (1 << 26);
726
int64_t h0 = load_4(s);
727
int64_t h1 = load_3(s + 4) << 6;
728
int64_t h2 = load_3(s + 7) << 5;
729
int64_t h3 = load_3(s + 10) << 3;
730
int64_t h4 = load_3(s + 13) << 2;
731
int64_t h5 = load_4(s + 16);
732
int64_t h6 = load_3(s + 20) << 7;
733
int64_t h7 = load_3(s + 23) << 5;
734
int64_t h8 = load_3(s + 26) << 4;
735
int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
737
const int64_t carry9 = (h9 + X24) >> 25;
740
const int64_t carry1 = (h1 + X24) >> 25;
743
const int64_t carry3 = (h3 + X24) >> 25;
746
const int64_t carry5 = (h5 + X24) >> 25;
749
const int64_t carry7 = (h7 + X24) >> 25;
753
const int64_t carry0 = (h0 + X25) >> 26;
756
const int64_t carry2 = (h2 + X25) >> 26;
759
const int64_t carry4 = (h4 + X25) >> 26;
762
const int64_t carry6 = (h6 + X25) >> 26;
765
const int64_t carry8 = (h8 + X25) >> 26;
783
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
785
Write p=2^255-19; q=floor(h/p).
786
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
789
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
790
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
792
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
796
Have 0<=r<=p-1=2^255-20.
797
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
799
Write x=r+19(2^-255)r+y.
800
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
802
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
803
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
806
void FE_25519::to_bytes(uint8_t s[32]) const
808
const int64_t X25 = (1 << 25);
809
const int64_t X26 = (1 << 26);
811
int32_t h0 = m_fe[0];
812
int32_t h1 = m_fe[1];
813
int32_t h2 = m_fe[2];
814
int32_t h3 = m_fe[3];
815
int32_t h4 = m_fe[4];
816
int32_t h5 = m_fe[5];
817
int32_t h6 = m_fe[6];
818
int32_t h7 = m_fe[7];
819
int32_t h8 = m_fe[8];
820
int32_t h9 = m_fe[9];
833
q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
845
/* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
847
/* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
881
Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
882
Have h0+...+2^230 h9 between 0 and 2^255-1;
883
evidently 2^255 h10-2^255 q = 0.
884
Goal: Output h0+...+2^230 h9.
890
s[3] = (h0 >> 24) | (h1 << 2);
893
s[6] = (h1 >> 22) | (h2 << 3);
896
s[9] = (h2 >> 21) | (h3 << 5);
899
s[12] = (h3 >> 19) | (h4 << 6);
906
s[19] = (h5 >> 24) | (h6 << 1);
909
s[22] = (h6 >> 23) | (h7 << 3);
912
s[25] = (h7 >> 21) | (h8 << 4);
915
s[28] = (h8 >> 20) | (h9 << 6);