1
/* crypto/ec/ecp_nistp256.c */
3
* Written by Adam Langley (Google) for the OpenSSL project
5
/* Copyright 2011 Google Inc.
7
* Licensed under the Apache License, Version 2.0 (the "License");
9
* you may not use this file except in compliance with the License.
10
* You may obtain a copy of the License at
12
* http://www.apache.org/licenses/LICENSE-2.0
14
* Unless required by applicable law or agreed to in writing, software
15
* distributed under the License is distributed on an "AS IS" BASIS,
16
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
* See the License for the specific language governing permissions and
18
* limitations under the License.
22
* A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24
* OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25
* Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26
* work which got its smarts from Daniel J. Bernstein's work on the same.
29
#include <openssl/opensslconf.h>
30
#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32
#ifndef OPENSSL_SYS_VMS
39
#include <openssl/err.h>
42
#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43
/* even with gcc, the typedef won't work for 32-bit platforms */
44
typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45
typedef __int128_t int128_t;
47
#error "Need GCC 3.1 or later to define type uint128_t"
55
/* The underlying field.
57
* P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
58
* of this field into 32 bytes. We call this an felem_bytearray. */
60
typedef u8 felem_bytearray[32];
62
/* These are the parameters of P256, taken from FIPS 186-3, page 86. These
63
* values are big-endian. */
64
static const felem_bytearray nistp256_curve_params[5] = {
65
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
66
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67
0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
68
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
69
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
70
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
71
0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
72
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
73
{0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
74
0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
75
0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
76
0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
77
{0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
78
0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
79
0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
80
0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
81
{0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
82
0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
83
0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
84
0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
87
/* The representation of field elements.
88
* ------------------------------------
90
* We represent field elements with either four 128-bit values, eight 128-bit
91
* values, or four 64-bit values. The field element represented is:
92
* v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
94
* v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
96
* 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
97
* apart, but are 128-bits wide, the most significant bits of each limb overlap
98
* with the least significant bits of the next.
100
* A field element with four limbs is an 'felem'. One with eight limbs is a
103
* A field element with four, 64-bit values is called a 'smallfelem'. Small
104
* values are used as intermediate values before multiplication.
109
typedef uint128_t limb;
110
typedef limb felem[NLIMBS];
111
typedef limb longfelem[NLIMBS * 2];
112
typedef u64 smallfelem[NLIMBS];
114
/* This is the value of the prime as four 64-bit words, little-endian. */
115
static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
116
static const limb bottom32bits = 0xffffffff;
117
static const u64 bottom63bits = 0x7ffffffffffffffful;
119
/* bin32_to_felem takes a little-endian byte array and converts it into felem
120
* form. This assumes that the CPU is little-endian. */
121
static void bin32_to_felem(felem out, const u8 in[32])
123
out[0] = *((u64*) &in[0]);
124
out[1] = *((u64*) &in[8]);
125
out[2] = *((u64*) &in[16]);
126
out[3] = *((u64*) &in[24]);
129
/* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
130
* 32 byte array. This assumes that the CPU is little-endian. */
131
static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
133
*((u64*) &out[0]) = in[0];
134
*((u64*) &out[8]) = in[1];
135
*((u64*) &out[16]) = in[2];
136
*((u64*) &out[24]) = in[3];
139
/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
140
static void flip_endian(u8 *out, const u8 *in, unsigned len)
143
for (i = 0; i < len; ++i)
144
out[i] = in[len-1-i];
147
/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
148
static int BN_to_felem(felem out, const BIGNUM *bn)
150
felem_bytearray b_in;
151
felem_bytearray b_out;
154
/* BN_bn2bin eats leading zeroes */
155
memset(b_out, 0, sizeof b_out);
156
num_bytes = BN_num_bytes(bn);
157
if (num_bytes > sizeof b_out)
159
ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
162
if (BN_is_negative(bn))
164
ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
167
num_bytes = BN_bn2bin(bn, b_in);
168
flip_endian(b_out, b_in, num_bytes);
169
bin32_to_felem(out, b_out);
173
/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
174
static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
176
felem_bytearray b_in, b_out;
177
smallfelem_to_bin32(b_in, in);
178
flip_endian(b_out, b_in, sizeof b_out);
179
return BN_bin2bn(b_out, sizeof b_out, out);
184
* ---------------- */
186
static void smallfelem_one(smallfelem out)
194
static void smallfelem_assign(smallfelem out, const smallfelem in)
202
static void felem_assign(felem out, const felem in)
210
/* felem_sum sets out = out + in. */
211
static void felem_sum(felem out, const felem in)
219
/* felem_small_sum sets out = out + in. */
220
static void felem_small_sum(felem out, const smallfelem in)
228
/* felem_scalar sets out = out * scalar */
229
static void felem_scalar(felem out, const u64 scalar)
237
/* longfelem_scalar sets out = out * scalar */
238
static void longfelem_scalar(longfelem out, const u64 scalar)
250
#define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
251
#define two105 (((limb)1) << 105)
252
#define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
254
/* zero105 is 0 mod p */
255
static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
257
/* smallfelem_neg sets |out| to |-small|
259
* out[i] < out[i] + 2^105
261
static void smallfelem_neg(felem out, const smallfelem small)
263
/* In order to prevent underflow, we subtract from 0 mod p. */
264
out[0] = zero105[0] - small[0];
265
out[1] = zero105[1] - small[1];
266
out[2] = zero105[2] - small[2];
267
out[3] = zero105[3] - small[3];
270
/* felem_diff subtracts |in| from |out|
274
* out[i] < out[i] + 2^105
276
static void felem_diff(felem out, const felem in)
278
/* In order to prevent underflow, we add 0 mod p before subtracting. */
279
out[0] += zero105[0];
280
out[1] += zero105[1];
281
out[2] += zero105[2];
282
out[3] += zero105[3];
290
#define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
291
#define two107 (((limb)1) << 107)
292
#define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
294
/* zero107 is 0 mod p */
295
static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
297
/* An alternative felem_diff for larger inputs |in|
298
* felem_diff_zero107 subtracts |in| from |out|
302
* out[i] < out[i] + 2^107
304
static void felem_diff_zero107(felem out, const felem in)
306
/* In order to prevent underflow, we add 0 mod p before subtracting. */
307
out[0] += zero107[0];
308
out[1] += zero107[1];
309
out[2] += zero107[2];
310
out[3] += zero107[3];
318
/* longfelem_diff subtracts |in| from |out|
322
* out[i] < out[i] + 2^70 + 2^40
324
static void longfelem_diff(longfelem out, const longfelem in)
326
static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
327
static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
328
static const limb two70 = (((limb)1) << 70);
329
static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
330
static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
332
/* add 0 mod p to avoid underflow */
336
out[3] += two70m40m38p6;
342
/* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
353
#define two64m0 (((limb)1) << 64) - 1
354
#define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
355
#define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
356
#define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
358
/* zero110 is 0 mod p */
359
static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
361
/* felem_shrink converts an felem into a smallfelem. The result isn't quite
362
* minimal as the value may be greater than p.
369
static void felem_shrink(smallfelem out, const felem in)
374
static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
377
tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
380
tmp[2] = zero110[2] + (u64) in[2];
381
tmp[0] = zero110[0] + in[0];
382
tmp[1] = zero110[1] + in[1];
383
/* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
385
/* We perform two partial reductions where we eliminate the
386
* high-word of tmp[3]. We don't update the other words till the end.
388
a = tmp[3] >> 64; /* a < 2^46 */
389
tmp[3] = (u64) tmp[3];
391
tmp[3] += ((limb)a) << 32;
395
a = tmp[3] >> 64; /* a < 2^15 */
396
b += a; /* b < 2^46 + 2^15 < 2^47 */
397
tmp[3] = (u64) tmp[3];
399
tmp[3] += ((limb)a) << 32;
400
/* tmp[3] < 2^64 + 2^47 */
402
/* This adjusts the other two words to complete the two partial
405
tmp[1] -= (((limb)b) << 32);
407
/* In order to make space in tmp[3] for the carry from 2 -> 3, we
408
* conditionally subtract kPrime if tmp[3] is large enough. */
410
/* As tmp[3] < 2^65, high is either 1 or 0 */
414
* all ones if the high word of tmp[3] is 1
415
* all zeros if the high word of tmp[3] if 0 */
419
* all ones if the MSB of low is 1
420
* all zeros if the MSB of low if 0 */
423
/* if low was greater than kPrime3Test then the MSB is zero */
427
* all ones if low was > kPrime3Test
428
* all zeros if low was <= kPrime3Test */
429
mask = (mask & low) | high;
430
tmp[0] -= mask & kPrime[0];
431
tmp[1] -= mask & kPrime[1];
432
/* kPrime[2] is zero, so omitted */
433
tmp[3] -= mask & kPrime[3];
434
/* tmp[3] < 2**64 - 2**32 + 1 */
436
tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
437
tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
438
tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
447
/* smallfelem_expand converts a smallfelem to an felem */
448
static void smallfelem_expand(felem out, const smallfelem in)
456
/* smallfelem_square sets |out| = |small|^2
460
* out[i] < 7 * 2^64 < 2^67
462
static void smallfelem_square(longfelem out, const smallfelem small)
467
a = ((uint128_t) small[0]) * small[0];
473
a = ((uint128_t) small[0]) * small[1];
480
a = ((uint128_t) small[0]) * small[2];
487
a = ((uint128_t) small[0]) * small[3];
493
a = ((uint128_t) small[1]) * small[2];
500
a = ((uint128_t) small[1]) * small[1];
506
a = ((uint128_t) small[1]) * small[3];
513
a = ((uint128_t) small[2]) * small[3];
521
a = ((uint128_t) small[2]) * small[2];
527
a = ((uint128_t) small[3]) * small[3];
534
/* felem_square sets |out| = |in|^2
538
* out[i] < 7 * 2^64 < 2^67
540
static void felem_square(longfelem out, const felem in)
543
felem_shrink(small, in);
544
smallfelem_square(out, small);
547
/* smallfelem_mul sets |out| = |small1| * |small2|
552
* out[i] < 7 * 2^64 < 2^67
554
static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
559
a = ((uint128_t) small1[0]) * small2[0];
566
a = ((uint128_t) small1[0]) * small2[1];
572
a = ((uint128_t) small1[1]) * small2[0];
579
a = ((uint128_t) small1[0]) * small2[2];
585
a = ((uint128_t) small1[1]) * small2[1];
591
a = ((uint128_t) small1[2]) * small2[0];
598
a = ((uint128_t) small1[0]) * small2[3];
604
a = ((uint128_t) small1[1]) * small2[2];
610
a = ((uint128_t) small1[2]) * small2[1];
616
a = ((uint128_t) small1[3]) * small2[0];
623
a = ((uint128_t) small1[1]) * small2[3];
629
a = ((uint128_t) small1[2]) * small2[2];
635
a = ((uint128_t) small1[3]) * small2[1];
642
a = ((uint128_t) small1[2]) * small2[3];
648
a = ((uint128_t) small1[3]) * small2[2];
655
a = ((uint128_t) small1[3]) * small2[3];
662
/* felem_mul sets |out| = |in1| * |in2|
667
* out[i] < 7 * 2^64 < 2^67
669
static void felem_mul(longfelem out, const felem in1, const felem in2)
671
smallfelem small1, small2;
672
felem_shrink(small1, in1);
673
felem_shrink(small2, in2);
674
smallfelem_mul(out, small1, small2);
677
/* felem_small_mul sets |out| = |small1| * |in2|
682
* out[i] < 7 * 2^64 < 2^67
684
static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
687
felem_shrink(small2, in2);
688
smallfelem_mul(out, small1, small2);
691
#define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
692
#define two100 (((limb)1) << 100)
693
#define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
694
/* zero100 is 0 mod p */
695
static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
697
/* Internal function for the different flavours of felem_reduce.
698
* felem_reduce_ reduces the higher coefficients in[4]-in[7].
700
* out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
701
* out[1] >= in[7] + 2^32*in[4]
702
* out[2] >= in[5] + 2^32*in[5]
703
* out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
705
* out[0] <= out[0] + in[4] + 2^32*in[5]
706
* out[1] <= out[1] + in[5] + 2^33*in[6]
707
* out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
708
* out[3] <= out[3] + 2^32*in[4] + 3*in[7]
710
static void felem_reduce_(felem out, const longfelem in)
713
/* combine common terms from below */
714
c = in[4] + (in[5] << 32);
722
/* the remaining terms */
723
/* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
724
out[1] -= (in[4] << 32);
725
out[3] += (in[4] << 32);
727
/* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
728
out[2] -= (in[5] << 32);
730
/* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
732
out[0] -= (in[6] << 32);
733
out[1] += (in[6] << 33);
734
out[2] += (in[6] * 2);
735
out[3] -= (in[6] << 32);
737
/* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
739
out[0] -= (in[7] << 32);
740
out[2] += (in[7] << 33);
741
out[3] += (in[7] * 3);
744
/* felem_reduce converts a longfelem into an felem.
745
* To be called directly after felem_square or felem_mul.
747
* in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
748
* in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
752
static void felem_reduce(felem out, const longfelem in)
754
out[0] = zero100[0] + in[0];
755
out[1] = zero100[1] + in[1];
756
out[2] = zero100[2] + in[2];
757
out[3] = zero100[3] + in[3];
759
felem_reduce_(out, in);
761
/* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
762
* out[1] > 2^100 - 2^64 - 7*2^96 > 0
763
* out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
764
* out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
766
* out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
767
* out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
768
* out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
769
* out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
773
/* felem_reduce_zero105 converts a larger longfelem into an felem.
779
static void felem_reduce_zero105(felem out, const longfelem in)
781
out[0] = zero105[0] + in[0];
782
out[1] = zero105[1] + in[1];
783
out[2] = zero105[2] + in[2];
784
out[3] = zero105[3] + in[3];
786
felem_reduce_(out, in);
788
/* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
789
* out[1] > 2^105 - 2^71 - 2^103 > 0
790
* out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
791
* out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
793
* out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
794
* out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
795
* out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
796
* out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
800
/* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
802
static void subtract_u64(u64* result, u64* carry, u64 v)
804
uint128_t r = *result;
806
*carry = (r >> 64) & 1;
810
/* felem_contract converts |in| to its unique, minimal representation.
814
static void felem_contract(smallfelem out, const felem in)
817
u64 all_equal_so_far = 0, result = 0, carry;
819
felem_shrink(out, in);
820
/* small is minimal except that the value might be > p */
823
/* We are doing a constant time test if out >= kPrime. We need to
824
* compare each u64, from most-significant to least significant. For
825
* each one, if all words so far have been equal (m is all ones) then a
826
* non-equal result is the answer. Otherwise we continue. */
827
for (i = 3; i < 4; i--)
830
uint128_t a = ((uint128_t) kPrime[i]) - out[i];
831
/* if out[i] > kPrime[i] then a will underflow and the high
832
* 64-bits will all be set. */
833
result |= all_equal_so_far & ((u64) (a >> 64));
835
/* if kPrime[i] == out[i] then |equal| will be all zeros and
836
* the decrement will make it all ones. */
837
equal = kPrime[i] ^ out[i];
839
equal &= equal << 32;
840
equal &= equal << 16;
845
equal = ((s64) equal) >> 63;
847
all_equal_so_far &= equal;
850
/* if all_equal_so_far is still all ones then the two values are equal
851
* and so out >= kPrime is true. */
852
result |= all_equal_so_far;
854
/* if out >= kPrime then we subtract kPrime. */
855
subtract_u64(&out[0], &carry, result & kPrime[0]);
856
subtract_u64(&out[1], &carry, carry);
857
subtract_u64(&out[2], &carry, carry);
858
subtract_u64(&out[3], &carry, carry);
860
subtract_u64(&out[1], &carry, result & kPrime[1]);
861
subtract_u64(&out[2], &carry, carry);
862
subtract_u64(&out[3], &carry, carry);
864
subtract_u64(&out[2], &carry, result & kPrime[2]);
865
subtract_u64(&out[3], &carry, carry);
867
subtract_u64(&out[3], &carry, result & kPrime[3]);
870
static void smallfelem_square_contract(smallfelem out, const smallfelem in)
875
smallfelem_square(longtmp, in);
876
felem_reduce(tmp, longtmp);
877
felem_contract(out, tmp);
880
static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
885
smallfelem_mul(longtmp, in1, in2);
886
felem_reduce(tmp, longtmp);
887
felem_contract(out, tmp);
890
/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
895
static limb smallfelem_is_zero(const smallfelem small)
900
u64 is_zero = small[0] | small[1] | small[2] | small[3];
902
is_zero &= is_zero << 32;
903
is_zero &= is_zero << 16;
904
is_zero &= is_zero << 8;
905
is_zero &= is_zero << 4;
906
is_zero &= is_zero << 2;
907
is_zero &= is_zero << 1;
908
is_zero = ((s64) is_zero) >> 63;
910
is_p = (small[0] ^ kPrime[0]) |
911
(small[1] ^ kPrime[1]) |
912
(small[2] ^ kPrime[2]) |
913
(small[3] ^ kPrime[3]);
921
is_p = ((s64) is_p) >> 63;
926
result |= ((limb) is_zero) << 64;
930
static int smallfelem_is_zero_int(const smallfelem small)
932
return (int) (smallfelem_is_zero(small) & ((limb)1));
935
/* felem_inv calculates |out| = |in|^{-1}
937
* Based on Fermat's Little Theorem:
939
* a^{p-1} = 1 (mod p)
940
* a^{p-2} = a^{-1} (mod p)
942
static void felem_inv(felem out, const felem in)
945
/* each e_I will hold |in|^{2^I - 1} */
946
felem e2, e4, e8, e16, e32, e64;
950
felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
951
felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
952
felem_assign(e2, ftmp);
953
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
954
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
955
felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
956
felem_assign(e4, ftmp);
957
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
958
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
959
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
960
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
961
felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
962
felem_assign(e8, ftmp);
963
for (i = 0; i < 8; i++) {
964
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
966
felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
967
felem_assign(e16, ftmp);
968
for (i = 0; i < 16; i++) {
969
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
971
felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
972
felem_assign(e32, ftmp);
973
for (i = 0; i < 32; i++) {
974
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
976
felem_assign(e64, ftmp);
977
felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
978
for (i = 0; i < 192; i++) {
979
felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
980
} /* 2^256 - 2^224 + 2^192 */
982
felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
983
for (i = 0; i < 16; i++) {
984
felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
986
felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
987
for (i = 0; i < 8; i++) {
988
felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
990
felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
991
for (i = 0; i < 4; i++) {
992
felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
994
felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
995
felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
996
felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
997
felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
998
felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
999
felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1000
felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1002
felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1005
static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1009
smallfelem_expand(tmp, in);
1010
felem_inv(tmp, tmp);
1011
felem_contract(out, tmp);
1017
* Building on top of the field operations we have the operations on the
1018
* elliptic curve group itself. Points on the curve are represented in Jacobian
1021
/* point_double calculates 2*(x_in, y_in, z_in)
1023
* The method is taken from:
1024
* http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1026
* Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1027
* while x_out == y_in is not (maybe this works, but it's not tested). */
1029
point_double(felem x_out, felem y_out, felem z_out,
1030
const felem x_in, const felem y_in, const felem z_in)
1032
longfelem tmp, tmp2;
1033
felem delta, gamma, beta, alpha, ftmp, ftmp2;
1034
smallfelem small1, small2;
1036
felem_assign(ftmp, x_in);
1037
/* ftmp[i] < 2^106 */
1038
felem_assign(ftmp2, x_in);
1039
/* ftmp2[i] < 2^106 */
1042
felem_square(tmp, z_in);
1043
felem_reduce(delta, tmp);
1044
/* delta[i] < 2^101 */
1047
felem_square(tmp, y_in);
1048
felem_reduce(gamma, tmp);
1049
/* gamma[i] < 2^101 */
1050
felem_shrink(small1, gamma);
1052
/* beta = x*gamma */
1053
felem_small_mul(tmp, small1, x_in);
1054
felem_reduce(beta, tmp);
1055
/* beta[i] < 2^101 */
1057
/* alpha = 3*(x-delta)*(x+delta) */
1058
felem_diff(ftmp, delta);
1059
/* ftmp[i] < 2^105 + 2^106 < 2^107 */
1060
felem_sum(ftmp2, delta);
1061
/* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1062
felem_scalar(ftmp2, 3);
1063
/* ftmp2[i] < 3 * 2^107 < 2^109 */
1064
felem_mul(tmp, ftmp, ftmp2);
1065
felem_reduce(alpha, tmp);
1066
/* alpha[i] < 2^101 */
1067
felem_shrink(small2, alpha);
1069
/* x' = alpha^2 - 8*beta */
1070
smallfelem_square(tmp, small2);
1071
felem_reduce(x_out, tmp);
1072
felem_assign(ftmp, beta);
1073
felem_scalar(ftmp, 8);
1074
/* ftmp[i] < 8 * 2^101 = 2^104 */
1075
felem_diff(x_out, ftmp);
1076
/* x_out[i] < 2^105 + 2^101 < 2^106 */
1078
/* z' = (y + z)^2 - gamma - delta */
1079
felem_sum(delta, gamma);
1080
/* delta[i] < 2^101 + 2^101 = 2^102 */
1081
felem_assign(ftmp, y_in);
1082
felem_sum(ftmp, z_in);
1083
/* ftmp[i] < 2^106 + 2^106 = 2^107 */
1084
felem_square(tmp, ftmp);
1085
felem_reduce(z_out, tmp);
1086
felem_diff(z_out, delta);
1087
/* z_out[i] < 2^105 + 2^101 < 2^106 */
1089
/* y' = alpha*(4*beta - x') - 8*gamma^2 */
1090
felem_scalar(beta, 4);
1091
/* beta[i] < 4 * 2^101 = 2^103 */
1092
felem_diff_zero107(beta, x_out);
1093
/* beta[i] < 2^107 + 2^103 < 2^108 */
1094
felem_small_mul(tmp, small2, beta);
1095
/* tmp[i] < 7 * 2^64 < 2^67 */
1096
smallfelem_square(tmp2, small1);
1097
/* tmp2[i] < 7 * 2^64 */
1098
longfelem_scalar(tmp2, 8);
1099
/* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1100
longfelem_diff(tmp, tmp2);
1101
/* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1102
felem_reduce_zero105(y_out, tmp);
1103
/* y_out[i] < 2^106 */
1106
/* point_double_small is the same as point_double, except that it operates on
1109
point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1110
const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1112
felem felem_x_out, felem_y_out, felem_z_out;
1113
felem felem_x_in, felem_y_in, felem_z_in;
1115
smallfelem_expand(felem_x_in, x_in);
1116
smallfelem_expand(felem_y_in, y_in);
1117
smallfelem_expand(felem_z_in, z_in);
1118
point_double(felem_x_out, felem_y_out, felem_z_out,
1119
felem_x_in, felem_y_in, felem_z_in);
1120
felem_shrink(x_out, felem_x_out);
1121
felem_shrink(y_out, felem_y_out);
1122
felem_shrink(z_out, felem_z_out);
1125
/* copy_conditional copies in to out iff mask is all ones. */
1127
copy_conditional(felem out, const felem in, limb mask)
1130
for (i = 0; i < NLIMBS; ++i)
1132
const limb tmp = mask & (in[i] ^ out[i]);
1137
/* copy_small_conditional copies in to out iff mask is all ones. */
1139
copy_small_conditional(felem out, const smallfelem in, limb mask)
1142
const u64 mask64 = mask;
1143
for (i = 0; i < NLIMBS; ++i)
1145
out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1149
/* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1151
* The method is taken from:
1152
* http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1153
* adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1155
* This function includes a branch for checking whether the two input points
1156
* are equal, (while not equal to the point at infinity). This case never
1157
* happens during single point multiplication, so there is no timing leak for
1158
* ECDH or ECDSA signing. */
1159
static void point_add(felem x3, felem y3, felem z3,
1160
const felem x1, const felem y1, const felem z1,
1161
const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1163
felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1164
longfelem tmp, tmp2;
1165
smallfelem small1, small2, small3, small4, small5;
1166
limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1168
felem_shrink(small3, z1);
1170
z1_is_zero = smallfelem_is_zero(small3);
1171
z2_is_zero = smallfelem_is_zero(z2);
1173
/* ftmp = z1z1 = z1**2 */
1174
smallfelem_square(tmp, small3);
1175
felem_reduce(ftmp, tmp);
1176
/* ftmp[i] < 2^101 */
1177
felem_shrink(small1, ftmp);
1181
/* ftmp2 = z2z2 = z2**2 */
1182
smallfelem_square(tmp, z2);
1183
felem_reduce(ftmp2, tmp);
1184
/* ftmp2[i] < 2^101 */
1185
felem_shrink(small2, ftmp2);
1187
felem_shrink(small5, x1);
1189
/* u1 = ftmp3 = x1*z2z2 */
1190
smallfelem_mul(tmp, small5, small2);
1191
felem_reduce(ftmp3, tmp);
1192
/* ftmp3[i] < 2^101 */
1194
/* ftmp5 = z1 + z2 */
1195
felem_assign(ftmp5, z1);
1196
felem_small_sum(ftmp5, z2);
1197
/* ftmp5[i] < 2^107 */
1199
/* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1200
felem_square(tmp, ftmp5);
1201
felem_reduce(ftmp5, tmp);
1202
/* ftmp2 = z2z2 + z1z1 */
1203
felem_sum(ftmp2, ftmp);
1204
/* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1205
felem_diff(ftmp5, ftmp2);
1206
/* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1208
/* ftmp2 = z2 * z2z2 */
1209
smallfelem_mul(tmp, small2, z2);
1210
felem_reduce(ftmp2, tmp);
1212
/* s1 = ftmp2 = y1 * z2**3 */
1213
felem_mul(tmp, y1, ftmp2);
1214
felem_reduce(ftmp6, tmp);
1215
/* ftmp6[i] < 2^101 */
1219
/* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1221
/* u1 = ftmp3 = x1*z2z2 */
1222
felem_assign(ftmp3, x1);
1223
/* ftmp3[i] < 2^106 */
1226
felem_assign(ftmp5, z1);
1227
felem_scalar(ftmp5, 2);
1228
/* ftmp5[i] < 2*2^106 = 2^107 */
1230
/* s1 = ftmp2 = y1 * z2**3 */
1231
felem_assign(ftmp6, y1);
1232
/* ftmp6[i] < 2^106 */
1236
smallfelem_mul(tmp, x2, small1);
1237
felem_reduce(ftmp4, tmp);
1239
/* h = ftmp4 = u2 - u1 */
1240
felem_diff_zero107(ftmp4, ftmp3);
1241
/* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1242
felem_shrink(small4, ftmp4);
1244
x_equal = smallfelem_is_zero(small4);
1246
/* z_out = ftmp5 * h */
1247
felem_small_mul(tmp, small4, ftmp5);
1248
felem_reduce(z_out, tmp);
1249
/* z_out[i] < 2^101 */
1251
/* ftmp = z1 * z1z1 */
1252
smallfelem_mul(tmp, small1, small3);
1253
felem_reduce(ftmp, tmp);
1255
/* s2 = tmp = y2 * z1**3 */
1256
felem_small_mul(tmp, y2, ftmp);
1257
felem_reduce(ftmp5, tmp);
1259
/* r = ftmp5 = (s2 - s1)*2 */
1260
felem_diff_zero107(ftmp5, ftmp6);
1261
/* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1262
felem_scalar(ftmp5, 2);
1263
/* ftmp5[i] < 2^109 */
1264
felem_shrink(small1, ftmp5);
1265
y_equal = smallfelem_is_zero(small1);
1267
if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1269
point_double(x3, y3, z3, x1, y1, z1);
1273
/* I = ftmp = (2h)**2 */
1274
felem_assign(ftmp, ftmp4);
1275
felem_scalar(ftmp, 2);
1276
/* ftmp[i] < 2*2^108 = 2^109 */
1277
felem_square(tmp, ftmp);
1278
felem_reduce(ftmp, tmp);
1280
/* J = ftmp2 = h * I */
1281
felem_mul(tmp, ftmp4, ftmp);
1282
felem_reduce(ftmp2, tmp);
1284
/* V = ftmp4 = U1 * I */
1285
felem_mul(tmp, ftmp3, ftmp);
1286
felem_reduce(ftmp4, tmp);
1288
/* x_out = r**2 - J - 2V */
1289
smallfelem_square(tmp, small1);
1290
felem_reduce(x_out, tmp);
1291
felem_assign(ftmp3, ftmp4);
1292
felem_scalar(ftmp4, 2);
1293
felem_sum(ftmp4, ftmp2);
1294
/* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1295
felem_diff(x_out, ftmp4);
1296
/* x_out[i] < 2^105 + 2^101 */
1298
/* y_out = r(V-x_out) - 2 * s1 * J */
1299
felem_diff_zero107(ftmp3, x_out);
1300
/* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1301
felem_small_mul(tmp, small1, ftmp3);
1302
felem_mul(tmp2, ftmp6, ftmp2);
1303
longfelem_scalar(tmp2, 2);
1304
/* tmp2[i] < 2*2^67 = 2^68 */
1305
longfelem_diff(tmp, tmp2);
1306
/* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1307
felem_reduce_zero105(y_out, tmp);
1308
/* y_out[i] < 2^106 */
1310
copy_small_conditional(x_out, x2, z1_is_zero);
1311
copy_conditional(x_out, x1, z2_is_zero);
1312
copy_small_conditional(y_out, y2, z1_is_zero);
1313
copy_conditional(y_out, y1, z2_is_zero);
1314
copy_small_conditional(z_out, z2, z1_is_zero);
1315
copy_conditional(z_out, z1, z2_is_zero);
1316
felem_assign(x3, x_out);
1317
felem_assign(y3, y_out);
1318
felem_assign(z3, z_out);
1321
/* point_add_small is the same as point_add, except that it operates on
1323
static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1324
smallfelem x1, smallfelem y1, smallfelem z1,
1325
smallfelem x2, smallfelem y2, smallfelem z2)
1327
felem felem_x3, felem_y3, felem_z3;
1328
felem felem_x1, felem_y1, felem_z1;
1329
smallfelem_expand(felem_x1, x1);
1330
smallfelem_expand(felem_y1, y1);
1331
smallfelem_expand(felem_z1, z1);
1332
point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1333
felem_shrink(x3, felem_x3);
1334
felem_shrink(y3, felem_y3);
1335
felem_shrink(z3, felem_z3);
1338
/* Base point pre computation
1339
* --------------------------
1341
* Two different sorts of precomputed tables are used in the following code.
1342
* Each contain various points on the curve, where each point is three field
1343
* elements (x, y, z).
1345
* For the base point table, z is usually 1 (0 for the point at infinity).
1346
* This table has 2 * 16 elements, starting with the following:
1347
* index | bits | point
1348
* ------+---------+------------------------------
1351
* 2 | 0 0 1 0 | 2^64G
1352
* 3 | 0 0 1 1 | (2^64 + 1)G
1353
* 4 | 0 1 0 0 | 2^128G
1354
* 5 | 0 1 0 1 | (2^128 + 1)G
1355
* 6 | 0 1 1 0 | (2^128 + 2^64)G
1356
* 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1357
* 8 | 1 0 0 0 | 2^192G
1358
* 9 | 1 0 0 1 | (2^192 + 1)G
1359
* 10 | 1 0 1 0 | (2^192 + 2^64)G
1360
* 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1361
* 12 | 1 1 0 0 | (2^192 + 2^128)G
1362
* 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1363
* 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1364
* 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1365
* followed by a copy of this with each element multiplied by 2^32.
1367
* The reason for this is so that we can clock bits into four different
1368
* locations when doing simple scalar multiplies against the base point,
1369
* and then another four locations using the second 16 elements.
1371
* Tables for other points have table[i] = iG for i in 0 .. 16. */
1373
/* gmul is the table of precomputed base points */
1374
static const smallfelem gmul[2][16][3] =
1378
{{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1379
{0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1381
{{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1382
{0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1384
{{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1385
{0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1387
{{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1388
{0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1390
{{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1391
{0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1393
{{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1394
{0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1396
{{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1397
{0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1399
{{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1400
{0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1402
{{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1403
{0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1405
{{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1406
{0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1408
{{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1409
{0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1411
{{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1412
{0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1414
{{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1415
{0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1417
{{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1418
{0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1420
{{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1421
{0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1426
{{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1427
{0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1429
{{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1430
{0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1432
{{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1433
{0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1435
{{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1436
{0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1438
{{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1439
{0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1441
{{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1442
{0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1444
{{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1445
{0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1447
{{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1448
{0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1450
{{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1451
{0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1453
{{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1454
{0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1456
{{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1457
{0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1459
{{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1460
{0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1462
{{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1463
{0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1465
{{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1466
{0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1468
{{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1469
{0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1472
/* select_point selects the |idx|th point from a precomputation table and
1473
* copies it to out. */
1474
static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1477
u64 *outlimbs = &out[0][0];
1478
memset(outlimbs, 0, 3 * sizeof(smallfelem));
1480
for (i = 0; i < size; i++)
1482
const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1489
for (j = 0; j < NLIMBS * 3; j++)
1490
outlimbs[j] |= inlimbs[j] & mask;
1494
/* get_bit returns the |i|th bit in |in| */
1495
static char get_bit(const felem_bytearray in, int i)
1497
if ((i < 0) || (i >= 256))
1499
return (in[i >> 3] >> (i & 7)) & 1;
1502
/* Interleaved point multiplication using precomputed point multiples:
1503
* The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1504
* the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1505
* of the generator, using certain (large) precomputed multiples in g_pre_comp.
1506
* Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1507
static void batch_mul(felem x_out, felem y_out, felem z_out,
1508
const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1509
const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1512
unsigned num, gen_mul = (g_scalar != NULL);
1518
/* set nq to the point at infinity */
1519
memset(nq, 0, 3 * sizeof(felem));
1521
/* Loop over all scalars msb-to-lsb, interleaving additions
1522
* of multiples of the generator (two in each of the last 32 rounds)
1523
* and additions of other points multiples (every 5th round).
1525
skip = 1; /* save two point operations in the first round */
1526
for (i = (num_points ? 255 : 31); i >= 0; --i)
1530
point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1532
/* add multiples of the generator */
1533
if (gen_mul && (i <= 31))
1535
/* first, look 32 bits upwards */
1536
bits = get_bit(g_scalar, i + 224) << 3;
1537
bits |= get_bit(g_scalar, i + 160) << 2;
1538
bits |= get_bit(g_scalar, i + 96) << 1;
1539
bits |= get_bit(g_scalar, i + 32);
1540
/* select the point to add, in constant time */
1541
select_point(bits, 16, g_pre_comp[1], tmp);
1545
point_add(nq[0], nq[1], nq[2],
1546
nq[0], nq[1], nq[2],
1547
1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1551
smallfelem_expand(nq[0], tmp[0]);
1552
smallfelem_expand(nq[1], tmp[1]);
1553
smallfelem_expand(nq[2], tmp[2]);
1557
/* second, look at the current position */
1558
bits = get_bit(g_scalar, i + 192) << 3;
1559
bits |= get_bit(g_scalar, i + 128) << 2;
1560
bits |= get_bit(g_scalar, i + 64) << 1;
1561
bits |= get_bit(g_scalar, i);
1562
/* select the point to add, in constant time */
1563
select_point(bits, 16, g_pre_comp[0], tmp);
1564
point_add(nq[0], nq[1], nq[2],
1565
nq[0], nq[1], nq[2],
1566
1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1569
/* do other additions every 5 doublings */
1570
if (num_points && (i % 5 == 0))
1572
/* loop over all scalars */
1573
for (num = 0; num < num_points; ++num)
1575
bits = get_bit(scalars[num], i + 4) << 5;
1576
bits |= get_bit(scalars[num], i + 3) << 4;
1577
bits |= get_bit(scalars[num], i + 2) << 3;
1578
bits |= get_bit(scalars[num], i + 1) << 2;
1579
bits |= get_bit(scalars[num], i) << 1;
1580
bits |= get_bit(scalars[num], i - 1);
1581
ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1583
/* select the point to add or subtract, in constant time */
1584
select_point(digit, 17, pre_comp[num], tmp);
1585
smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1586
copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1587
felem_contract(tmp[1], ftmp);
1591
point_add(nq[0], nq[1], nq[2],
1592
nq[0], nq[1], nq[2],
1593
mixed, tmp[0], tmp[1], tmp[2]);
1597
smallfelem_expand(nq[0], tmp[0]);
1598
smallfelem_expand(nq[1], tmp[1]);
1599
smallfelem_expand(nq[2], tmp[2]);
1605
felem_assign(x_out, nq[0]);
1606
felem_assign(y_out, nq[1]);
1607
felem_assign(z_out, nq[2]);
1610
/* Precomputation for the group generator. */
1612
smallfelem g_pre_comp[2][16][3];
1614
} NISTP256_PRE_COMP;
1616
const EC_METHOD *EC_GFp_nistp256_method(void)
1618
static const EC_METHOD ret = {
1619
EC_FLAGS_DEFAULT_OCT,
1620
NID_X9_62_prime_field,
1621
ec_GFp_nistp256_group_init,
1622
ec_GFp_simple_group_finish,
1623
ec_GFp_simple_group_clear_finish,
1624
ec_GFp_nist_group_copy,
1625
ec_GFp_nistp256_group_set_curve,
1626
ec_GFp_simple_group_get_curve,
1627
ec_GFp_simple_group_get_degree,
1628
ec_GFp_simple_group_check_discriminant,
1629
ec_GFp_simple_point_init,
1630
ec_GFp_simple_point_finish,
1631
ec_GFp_simple_point_clear_finish,
1632
ec_GFp_simple_point_copy,
1633
ec_GFp_simple_point_set_to_infinity,
1634
ec_GFp_simple_set_Jprojective_coordinates_GFp,
1635
ec_GFp_simple_get_Jprojective_coordinates_GFp,
1636
ec_GFp_simple_point_set_affine_coordinates,
1637
ec_GFp_nistp256_point_get_affine_coordinates,
1638
0 /* point_set_compressed_coordinates */,
1643
ec_GFp_simple_invert,
1644
ec_GFp_simple_is_at_infinity,
1645
ec_GFp_simple_is_on_curve,
1647
ec_GFp_simple_make_affine,
1648
ec_GFp_simple_points_make_affine,
1649
ec_GFp_nistp256_points_mul,
1650
ec_GFp_nistp256_precompute_mult,
1651
ec_GFp_nistp256_have_precompute_mult,
1652
ec_GFp_nist_field_mul,
1653
ec_GFp_nist_field_sqr,
1655
0 /* field_encode */,
1656
0 /* field_decode */,
1657
0 /* field_set_to_one */ };
1662
/******************************************************************************/
1663
/* FUNCTIONS TO MANAGE PRECOMPUTATION
1666
static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1668
NISTP256_PRE_COMP *ret = NULL;
1669
ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1672
ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1675
memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1676
ret->references = 1;
1680
static void *nistp256_pre_comp_dup(void *src_)
1682
NISTP256_PRE_COMP *src = src_;
1684
/* no need to actually copy, these objects never change! */
1685
CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1690
static void nistp256_pre_comp_free(void *pre_)
1693
NISTP256_PRE_COMP *pre = pre_;
1698
i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1705
static void nistp256_pre_comp_clear_free(void *pre_)
1708
NISTP256_PRE_COMP *pre = pre_;
1713
i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1717
OPENSSL_cleanse(pre, sizeof *pre);
1721
/******************************************************************************/
1722
/* OPENSSL EC_METHOD FUNCTIONS
1725
int ec_GFp_nistp256_group_init(EC_GROUP *group)
1728
ret = ec_GFp_simple_group_init(group);
1729
group->a_is_minus3 = 1;
1733
int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1734
const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1737
BN_CTX *new_ctx = NULL;
1738
BIGNUM *curve_p, *curve_a, *curve_b;
1741
if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1743
if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1744
((curve_a = BN_CTX_get(ctx)) == NULL) ||
1745
((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1746
BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1747
BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1748
BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1749
if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1750
(BN_cmp(curve_b, b)))
1752
ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1753
EC_R_WRONG_CURVE_PARAMETERS);
1756
group->field_mod_func = BN_nist_mod_256;
1757
ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1760
if (new_ctx != NULL)
1761
BN_CTX_free(new_ctx);
1765
/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1766
* (X', Y') = (X/Z^2, Y/Z^3) */
1767
int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1768
const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1770
felem z1, z2, x_in, y_in;
1771
smallfelem x_out, y_out;
1774
if (EC_POINT_is_at_infinity(group, point))
1776
ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1777
EC_R_POINT_AT_INFINITY);
1780
if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1781
(!BN_to_felem(z1, &point->Z))) return 0;
1783
felem_square(tmp, z2); felem_reduce(z1, tmp);
1784
felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1785
felem_contract(x_out, x_in);
1788
if (!smallfelem_to_BN(x, x_out)) {
1789
ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1794
felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1795
felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1796
felem_contract(y_out, y_in);
1799
if (!smallfelem_to_BN(y, y_out))
1801
ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1809
static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1811
/* Runs in constant time, unless an input is the point at infinity
1812
* (which normally shouldn't happen). */
1813
ec_GFp_nistp_points_make_affine_internal(
1818
(void (*)(void *)) smallfelem_one,
1819
(int (*)(const void *)) smallfelem_is_zero_int,
1820
(void (*)(void *, const void *)) smallfelem_assign,
1821
(void (*)(void *, const void *)) smallfelem_square_contract,
1822
(void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1823
(void (*)(void *, const void *)) smallfelem_inv_contract,
1824
(void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1827
/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1828
* Result is stored in r (r can equal one of the inputs). */
1829
int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1830
const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1831
const BIGNUM *scalars[], BN_CTX *ctx)
1836
BN_CTX *new_ctx = NULL;
1837
BIGNUM *x, *y, *z, *tmp_scalar;
1838
felem_bytearray g_secret;
1839
felem_bytearray *secrets = NULL;
1840
smallfelem (*pre_comp)[17][3] = NULL;
1841
smallfelem *tmp_smallfelems = NULL;
1842
felem_bytearray tmp;
1843
unsigned i, num_bytes;
1844
int have_pre_comp = 0;
1845
size_t num_points = num;
1846
smallfelem x_in, y_in, z_in;
1847
felem x_out, y_out, z_out;
1848
NISTP256_PRE_COMP *pre = NULL;
1849
const smallfelem (*g_pre_comp)[16][3] = NULL;
1850
EC_POINT *generator = NULL;
1851
const EC_POINT *p = NULL;
1852
const BIGNUM *p_scalar = NULL;
1855
if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1857
if (((x = BN_CTX_get(ctx)) == NULL) ||
1858
((y = BN_CTX_get(ctx)) == NULL) ||
1859
((z = BN_CTX_get(ctx)) == NULL) ||
1860
((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1865
pre = EC_EX_DATA_get_data(group->extra_data,
1866
nistp256_pre_comp_dup, nistp256_pre_comp_free,
1867
nistp256_pre_comp_clear_free);
1869
/* we have precomputation, try to use it */
1870
g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1872
/* try to use the standard precomputation */
1873
g_pre_comp = &gmul[0];
1874
generator = EC_POINT_new(group);
1875
if (generator == NULL)
1877
/* get the generator from precomputation */
1878
if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1879
!smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1880
!smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1882
ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1885
if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1886
generator, x, y, z, ctx))
1888
if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1889
/* precomputation matches generator */
1892
/* we don't have valid precomputation:
1893
* treat the generator as a random point */
1898
if (num_points >= 3)
1900
/* unless we precompute multiples for just one or two points,
1901
* converting those into affine form is time well spent */
1904
secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1905
pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1907
tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1908
if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1910
ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1914
/* we treat NULL scalars as 0, and NULL points as points at infinity,
1915
* i.e., they contribute nothing to the linear combination */
1916
memset(secrets, 0, num_points * sizeof(felem_bytearray));
1917
memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1918
for (i = 0; i < num_points; ++i)
1921
/* we didn't have a valid precomputation, so we pick
1924
p = EC_GROUP_get0_generator(group);
1928
/* the i^th point */
1931
p_scalar = scalars[i];
1933
if ((p_scalar != NULL) && (p != NULL))
1935
/* reduce scalar to 0 <= scalar < 2^256 */
1936
if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1938
/* this is an unusual input, and we don't guarantee
1939
* constant-timeness */
1940
if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1942
ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1945
num_bytes = BN_bn2bin(tmp_scalar, tmp);
1948
num_bytes = BN_bn2bin(p_scalar, tmp);
1949
flip_endian(secrets[i], tmp, num_bytes);
1950
/* precompute multiples */
1951
if ((!BN_to_felem(x_out, &p->X)) ||
1952
(!BN_to_felem(y_out, &p->Y)) ||
1953
(!BN_to_felem(z_out, &p->Z))) goto err;
1954
felem_shrink(pre_comp[i][1][0], x_out);
1955
felem_shrink(pre_comp[i][1][1], y_out);
1956
felem_shrink(pre_comp[i][1][2], z_out);
1957
for (j = 2; j <= 16; ++j)
1962
pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1963
pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1964
pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1969
pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1970
pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1976
make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1979
/* the scalar for the generator */
1980
if ((scalar != NULL) && (have_pre_comp))
1982
memset(g_secret, 0, sizeof(g_secret));
1983
/* reduce scalar to 0 <= scalar < 2^256 */
1984
if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
1986
/* this is an unusual input, and we don't guarantee
1987
* constant-timeness */
1988
if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1990
ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1993
num_bytes = BN_bn2bin(tmp_scalar, tmp);
1996
num_bytes = BN_bn2bin(scalar, tmp);
1997
flip_endian(g_secret, tmp, num_bytes);
1998
/* do the multiplication with generator precomputation*/
1999
batch_mul(x_out, y_out, z_out,
2000
(const felem_bytearray (*)) secrets, num_points,
2002
mixed, (const smallfelem (*)[17][3]) pre_comp,
2006
/* do the multiplication without generator precomputation */
2007
batch_mul(x_out, y_out, z_out,
2008
(const felem_bytearray (*)) secrets, num_points,
2009
NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2010
/* reduce the output to its unique minimal representation */
2011
felem_contract(x_in, x_out);
2012
felem_contract(y_in, y_out);
2013
felem_contract(z_in, z_out);
2014
if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2015
(!smallfelem_to_BN(z, z_in)))
2017
ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2020
ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2024
if (generator != NULL)
2025
EC_POINT_free(generator);
2026
if (new_ctx != NULL)
2027
BN_CTX_free(new_ctx);
2028
if (secrets != NULL)
2029
OPENSSL_free(secrets);
2030
if (pre_comp != NULL)
2031
OPENSSL_free(pre_comp);
2032
if (tmp_smallfelems != NULL)
2033
OPENSSL_free(tmp_smallfelems);
2037
int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2040
NISTP256_PRE_COMP *pre = NULL;
2042
BN_CTX *new_ctx = NULL;
2044
EC_POINT *generator = NULL;
2045
smallfelem tmp_smallfelems[32];
2046
felem x_tmp, y_tmp, z_tmp;
2048
/* throw away old precomputation */
2049
EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2050
nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2052
if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2054
if (((x = BN_CTX_get(ctx)) == NULL) ||
2055
((y = BN_CTX_get(ctx)) == NULL))
2057
/* get the generator */
2058
if (group->generator == NULL) goto err;
2059
generator = EC_POINT_new(group);
2060
if (generator == NULL)
2062
BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2063
BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2064
if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2066
if ((pre = nistp256_pre_comp_new()) == NULL)
2068
/* if the generator is the standard one, use built-in precomputation */
2069
if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2071
memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2075
if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2076
(!BN_to_felem(y_tmp, &group->generator->Y)) ||
2077
(!BN_to_felem(z_tmp, &group->generator->Z)))
2079
felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2080
felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2081
felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2082
/* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2083
* 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2085
for (i = 1; i <= 8; i <<= 1)
2088
pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2089
pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2090
for (j = 0; j < 31; ++j)
2093
pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2094
pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2099
pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2100
pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2101
for (j = 0; j < 31; ++j)
2104
pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2105
pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2108
for (i = 0; i < 2; i++)
2110
/* g_pre_comp[i][0] is the point at infinity */
2111
memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2112
/* the remaining multiples */
2113
/* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2115
pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2116
pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2117
pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2118
/* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2120
pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2121
pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2122
pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2123
/* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2125
pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2126
pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2127
pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2128
/* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2130
pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2131
pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2132
pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2133
for (j = 1; j < 8; ++j)
2135
/* odd multiples: add G resp. 2^32*G */
2137
pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2138
pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2139
pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2142
make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2144
if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2145
nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2151
if (generator != NULL)
2152
EC_POINT_free(generator);
2153
if (new_ctx != NULL)
2154
BN_CTX_free(new_ctx);
2156
nistp256_pre_comp_free(pre);
2160
int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2162
if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2163
nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2170
static void *dummy=&dummy;