113
112
#include "cryptlib.h"
114
113
#include "bn_lcl.h"
119
# define alloca _alloca
121
#elif defined(__GNUC__)
123
# define alloca(s) __builtin_alloca((s))
129
#include "rsaz_exp.h"
132
#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
133
# include "sparc_arch.h"
134
extern unsigned int OPENSSL_sparcv9cap_P[];
135
# define SPARC_T4_MONT
116
138
/* maximum precomputation table size for *variable* sliding windows */
117
#define TABLE_SIZE 32
139
#define TABLE_SIZE 32
119
141
/* this one works - simple but works */
120
142
int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
125
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
127
/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
128
BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
133
if ((r == a) || (r == p))
134
rr = BN_CTX_get(ctx);
138
if (rr == NULL || v == NULL) goto err;
140
if (BN_copy(v,a) == NULL) goto err;
144
{ if (BN_copy(rr,a) == NULL) goto err; }
145
else { if (!BN_one(rr)) goto err; }
147
for (i=1; i<bits; i++)
149
if (!BN_sqr(v,v,ctx)) goto err;
150
if (BN_is_bit_set(p,i))
152
if (!BN_mul(rr,rr,v,ctx)) goto err;
157
if (r != rr) BN_copy(r,rr);
144
int i, bits, ret = 0;
147
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
148
/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
149
BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
154
if ((r == a) || (r == p))
155
rr = BN_CTX_get(ctx);
159
if (rr == NULL || v == NULL)
162
if (BN_copy(v, a) == NULL)
164
bits = BN_num_bits(p);
167
if (BN_copy(rr, a) == NULL)
174
for (i = 1; i < bits; i++) {
175
if (!BN_sqr(v, v, ctx))
177
if (BN_is_bit_set(p, i)) {
178
if (!BN_mul(rr, rr, v, ctx))
164
191
int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
173
/* For even modulus m = 2^k*m_odd, it might make sense to compute
174
* a^p mod m_odd and a^p mod 2^k separately (with Montgomery
175
* exponentiation for the odd part), using appropriate exponent
176
* reductions, and combine the results using the CRT.
178
* For now, we use Montgomery only if the modulus is odd; otherwise,
179
* exponentiation using the reciprocal-based quick remaindering
182
* (Timing obtained with expspeed.c [computations a^p mod m
183
* where a, p, m are of the same length: 256, 512, 1024, 2048,
184
* 4096, 8192 bits], compared to the running time of the
185
* standard algorithm:
187
* BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
188
* 55 .. 77 % [UltraSparc processor, but
189
* debug-solaris-sparcv8-gcc conf.]
191
* BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
192
* 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
194
* On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
195
* at 2048 and more bits, but at 512 and 1024 bits, it was
196
* slower even than the standard algorithm!
198
* "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
199
* should be obtained when the new Montgomery reduction code
200
* has been integrated into OpenSSL.)
201
* For even modulus m = 2^k*m_odd, it might make sense to compute
202
* a^p mod m_odd and a^p mod 2^k separately (with Montgomery
203
* exponentiation for the odd part), using appropriate exponent
204
* reductions, and combine the results using the CRT.
206
* For now, we use Montgomery only if the modulus is odd; otherwise,
207
* exponentiation using the reciprocal-based quick remaindering
210
* (Timing obtained with expspeed.c [computations a^p mod m
211
* where a, p, m are of the same length: 256, 512, 1024, 2048,
212
* 4096, 8192 bits], compared to the running time of the
213
* standard algorithm:
215
* BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
216
* 55 .. 77 % [UltraSparc processor, but
217
* debug-solaris-sparcv8-gcc conf.]
219
* BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
220
* 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
222
* On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
223
* at 2048 and more bits, but at 512 and 1024 bits, it was
224
* slower even than the standard algorithm!
226
* "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
227
* should be obtained when the new Montgomery reduction code
228
* has been integrated into OpenSSL.)
203
231
#define MONT_MUL_MOD
204
232
#define MONT_EXP_WORD
205
233
#define RECP_MUL_MOD
207
235
#ifdef MONT_MUL_MOD
208
/* I have finally been able to take out this pre-condition of
209
* the top bit being set. It was caused by an error in BN_div
210
* with negatives. There was also another problem when for a^b%m
211
* a >= m. eay 07-May-97 */
212
/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
237
* I have finally been able to take out this pre-condition of the top bit
238
* being set. It was caused by an error in BN_div with negatives. There
239
* was also another problem when for a^b%m a >= m. eay 07-May-97
241
/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
216
# ifdef MONT_EXP_WORD
217
if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
219
BN_ULONG A = a->d[0];
220
ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
224
ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
244
# ifdef MONT_EXP_WORD
245
if (a->top == 1 && !a->neg
246
&& (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
247
BN_ULONG A = a->d[0];
248
ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
251
ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
228
254
#ifdef RECP_MUL_MOD
229
{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
256
ret = BN_mod_exp_recp(r, a, p, m, ctx);
231
{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
260
ret = BN_mod_exp_simple(r, a, p, m, ctx);
239
268
int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
240
const BIGNUM *m, BN_CTX *ctx)
242
int i,j,bits,ret=0,wstart,wend,window,wvalue;
245
/* Table of variables obtained from 'ctx' */
246
BIGNUM *val[TABLE_SIZE];
249
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
251
/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
252
BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
265
aa = BN_CTX_get(ctx);
266
val[0] = BN_CTX_get(ctx);
267
if(!aa || !val[0]) goto err;
269
BN_RECP_CTX_init(&recp);
272
/* ignore sign of 'm' */
273
if (!BN_copy(aa, m)) goto err;
275
if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
279
if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
282
if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
283
if (BN_is_zero(val[0]))
290
window = BN_window_bits_for_exponent_size(bits);
293
if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
298
if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
299
!BN_mod_mul_reciprocal(val[i],val[i-1],
305
start=1; /* This is used to avoid multiplication etc
306
* when there is only the value '1' in the
308
wvalue=0; /* The 'value' of the window */
309
wstart=bits-1; /* The top bit of the window */
310
wend=0; /* The bottom bit of the window */
312
if (!BN_one(r)) goto err;
316
if (BN_is_bit_set(p,wstart) == 0)
319
if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
321
if (wstart == 0) break;
325
/* We now have wstart on a 'set' bit, we now need to work out
326
* how bit a window to do. To do this we need to scan
327
* forward until the last set bit before the end of the
332
for (i=1; i<window; i++)
334
if (wstart-i < 0) break;
335
if (BN_is_bit_set(p,wstart-i))
343
/* wend is the size of the current window */
345
/* add the 'bytes above' */
349
if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
353
/* wvalue will be an odd number < 2^window */
354
if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
357
/* move the 'window' down further */
361
if (wstart < 0) break;
366
BN_RECP_CTX_free(&recp);
269
const BIGNUM *m, BN_CTX *ctx)
271
int i, j, bits, ret = 0, wstart, wend, window, wvalue;
274
/* Table of variables obtained from 'ctx' */
275
BIGNUM *val[TABLE_SIZE];
278
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
279
/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
280
BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
284
bits = BN_num_bits(p);
292
aa = BN_CTX_get(ctx);
293
val[0] = BN_CTX_get(ctx);
297
BN_RECP_CTX_init(&recp);
299
/* ignore sign of 'm' */
303
if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
306
if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
310
if (!BN_nnmod(val[0], a, m, ctx))
312
if (BN_is_zero(val[0])) {
318
window = BN_window_bits_for_exponent_size(bits);
320
if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
322
j = 1 << (window - 1);
323
for (i = 1; i < j; i++) {
324
if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
325
!BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
330
start = 1; /* This is used to avoid multiplication etc
331
* when there is only the value '1' in the
333
wvalue = 0; /* The 'value' of the window */
334
wstart = bits - 1; /* The top bit of the window */
335
wend = 0; /* The bottom bit of the window */
341
if (BN_is_bit_set(p, wstart) == 0) {
343
if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
351
* We now have wstart on a 'set' bit, we now need to work out how bit
352
* a window to do. To do this we need to scan forward until the last
353
* set bit before the end of the window
358
for (i = 1; i < window; i++) {
361
if (BN_is_bit_set(p, wstart - i)) {
362
wvalue <<= (i - wend);
368
/* wend is the size of the current window */
370
/* add the 'bytes above' */
372
for (i = 0; i < j; i++) {
373
if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
377
/* wvalue will be an odd number < 2^window */
378
if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
381
/* move the 'window' down further */
391
BN_RECP_CTX_free(&recp);
372
396
int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
373
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
375
int i,j,bits,ret=0,wstart,wend,window,wvalue;
379
/* Table of variables obtained from 'ctx' */
380
BIGNUM *val[TABLE_SIZE];
381
BN_MONT_CTX *mont=NULL;
383
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
385
return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
394
BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
407
val[0] = BN_CTX_get(ctx);
408
if (!d || !r || !val[0]) goto err;
410
/* If this is not done, things will break in the montgomery
417
if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
418
if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
421
if (a->neg || BN_ucmp(a,m) >= 0)
423
if (!BN_nnmod(val[0],a,m,ctx))
435
if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
437
window = BN_window_bits_for_exponent_size(bits);
440
if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
444
if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
445
!BN_mod_mul_montgomery(val[i],val[i-1],
451
start=1; /* This is used to avoid multiplication etc
452
* when there is only the value '1' in the
454
wvalue=0; /* The 'value' of the window */
455
wstart=bits-1; /* The top bit of the window */
456
wend=0; /* The bottom bit of the window */
458
if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
461
if (BN_is_bit_set(p,wstart) == 0)
465
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
468
if (wstart == 0) break;
472
/* We now have wstart on a 'set' bit, we now need to work out
473
* how bit a window to do. To do this we need to scan
474
* forward until the last set bit before the end of the
479
for (i=1; i<window; i++)
481
if (wstart-i < 0) break;
482
if (BN_is_bit_set(p,wstart-i))
490
/* wend is the size of the current window */
492
/* add the 'bytes above' */
496
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
500
/* wvalue will be an odd number < 2^window */
501
if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
504
/* move the 'window' down further */
508
if (wstart < 0) break;
510
if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
513
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
520
/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
521
* so that accessing any of these table values shows the same access pattern as far
522
* as cache lines are concerned. The following functions are used to transfer a BIGNUM
523
* from/to that table. */
525
static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
529
if (bn_wexpand(b, top) == NULL)
536
for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
538
buf[j] = ((unsigned char*)b->d)[i];
545
static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
549
if (bn_wexpand(b, top) == NULL)
552
for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
554
((unsigned char*)b->d)[i] = buf[j];
562
/* Given a pointer value, compute the next address that is a cache line multiple. */
397
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
399
int i, j, bits, ret = 0, wstart, wend, window, wvalue;
403
/* Table of variables obtained from 'ctx' */
404
BIGNUM *val[TABLE_SIZE];
405
BN_MONT_CTX *mont = NULL;
407
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
408
return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
416
BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
419
bits = BN_num_bits(p);
428
val[0] = BN_CTX_get(ctx);
429
if (!d || !r || !val[0])
433
* If this is not done, things will break in the montgomery part
439
if ((mont = BN_MONT_CTX_new()) == NULL)
441
if (!BN_MONT_CTX_set(mont, m, ctx))
445
if (a->neg || BN_ucmp(a, m) >= 0) {
446
if (!BN_nnmod(val[0], a, m, ctx))
451
if (BN_is_zero(aa)) {
456
if (!BN_to_montgomery(val[0], aa, mont, ctx))
459
window = BN_window_bits_for_exponent_size(bits);
461
if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
463
j = 1 << (window - 1);
464
for (i = 1; i < j; i++) {
465
if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
466
!BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
471
start = 1; /* This is used to avoid multiplication etc
472
* when there is only the value '1' in the
474
wvalue = 0; /* The 'value' of the window */
475
wstart = bits - 1; /* The top bit of the window */
476
wend = 0; /* The bottom bit of the window */
478
#if 1 /* by Shay Gueron's suggestion */
479
j = m->top; /* borrow j */
480
if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
481
if (bn_wexpand(r, j) == NULL)
483
/* 2^(top*BN_BITS2) - m */
484
r->d[0] = (0 - m->d[0]) & BN_MASK2;
485
for (i = 1; i < j; i++)
486
r->d[i] = (~m->d[i]) & BN_MASK2;
489
* Upper words will be zero if the corresponding words of 'm' were
490
* 0xfff[...], so decrement r->top accordingly.
495
if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
498
if (BN_is_bit_set(p, wstart) == 0) {
500
if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
509
* We now have wstart on a 'set' bit, we now need to work out how bit
510
* a window to do. To do this we need to scan forward until the last
511
* set bit before the end of the window
516
for (i = 1; i < window; i++) {
519
if (BN_is_bit_set(p, wstart - i)) {
520
wvalue <<= (i - wend);
526
/* wend is the size of the current window */
528
/* add the 'bytes above' */
530
for (i = 0; i < j; i++) {
531
if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
535
/* wvalue will be an odd number < 2^window */
536
if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
539
/* move the 'window' down further */
546
#if defined(SPARC_T4_MONT)
547
if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
548
j = mont->N.top; /* borrow j */
549
val[0]->d[0] = 1; /* borrow val[0] */
550
for (i = 1; i < j; i++)
553
if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
557
if (!BN_from_montgomery(rr, r, mont, ctx))
561
if ((in_mont == NULL) && (mont != NULL))
562
BN_MONT_CTX_free(mont);
568
#if defined(SPARC_T4_MONT)
569
static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
574
wordpos = bitpos / BN_BITS2;
576
if (wordpos >= 0 && wordpos < a->top) {
577
ret = a->d[wordpos] & BN_MASK2;
580
if (++wordpos < a->top)
581
ret |= a->d[wordpos] << (BN_BITS2 - bitpos);
585
return ret & BN_MASK2;
590
* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
591
* layout so that accessing any of these table values shows the same access
592
* pattern as far as cache lines are concerned. The following functions are
593
* used to transfer a BIGNUM from/to that table.
596
static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
597
unsigned char *buf, int idx,
603
top = b->top; /* this works because 'buf' is explicitly
605
for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
606
buf[j] = ((unsigned char *)b->d)[i];
612
static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
613
unsigned char *buf, int idx,
618
if (bn_wexpand(b, top) == NULL)
621
for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
622
((unsigned char *)b->d)[i] = buf[j];
631
* Given a pointer value, compute the next address that is a cache line
563
634
#define MOD_EXP_CTIME_ALIGN(x_) \
564
((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
635
((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
566
/* This variant of BN_mod_exp_mont() uses fixed windows and the special
567
* precomputation memory layout to limit data-dependency to a minimum
568
* to protect secret exponents (cf. the hyper-threading timing attacks
569
* pointed out by Colin Percival,
570
* http://www.daemonology.net/hyperthreading-considered-harmful/)
638
* This variant of BN_mod_exp_mont() uses fixed windows and the special
639
* precomputation memory layout to limit data-dependency to a minimum to
640
* protect secret exponents (cf. the hyper-threading timing attacks pointed
641
* out by Colin Percival,
642
* http://www.daemong-consideredperthreading-considered-harmful/)
572
644
int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
573
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
575
int i,bits,ret=0,idx,window,wvalue;
579
BN_MONT_CTX *mont=NULL;
582
unsigned char *powerbufFree=NULL;
584
unsigned char *powerbuf=NULL;
585
BIGNUM *computeTemp=NULL, *am=NULL;
595
BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
605
/* Initialize BIGNUM context and allocate intermediate result */
608
if (r == NULL) goto err;
610
/* Allocate a montgomery context if it was not supplied by the caller.
611
* If this is not done, things will break in the montgomery part.
617
if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
618
if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
621
/* Get the window size to use with size of p. */
622
window = BN_window_bits_for_ctime_exponent_size(bits);
624
/* Allocate a buffer large enough to hold all of the pre-computed
627
numPowers = 1 << window;
628
powerbufLen = sizeof(m->d[0])*top*numPowers;
629
if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
632
powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
633
memset(powerbuf, 0, powerbufLen);
635
/* Initialize the intermediate result. Do this early to save double conversion,
636
* once each for a^0 and intermediate result.
638
if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
639
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
641
/* Initialize computeTemp as a^1 with montgomery precalcs */
642
computeTemp = BN_CTX_get(ctx);
643
am = BN_CTX_get(ctx);
644
if (computeTemp==NULL || am==NULL) goto err;
646
if (a->neg || BN_ucmp(a,m) >= 0)
648
if (!BN_mod(am,a,m,ctx))
654
if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
655
if (!BN_copy(computeTemp, am)) goto err;
656
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
658
/* If the window size is greater than 1, then calculate
659
* val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
660
* (even powers could instead be computed as (a^(i/2))^2
661
* to use the slight performance advantage of sqr over mul).
665
for (i=2; i<numPowers; i++)
667
/* Calculate a^i = a^(i-1) * a */
668
if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
670
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
674
/* Adjust the number of bits up to a multiple of the window size.
675
* If the exponent length is not a multiple of the window size, then
676
* this pads the most significant bits with zeros to normalize the
677
* scanning loop to there's no special cases.
679
* * NOTE: Making the window size a power of two less than the native
680
* * word size ensures that the padded bits won't go past the last
681
* * word in the internal BIGNUM structure. Going past the end will
682
* * still produce the correct result, but causes a different branch
683
* * to be taken in the BN_is_bit_set function.
685
bits = ((bits+window-1)/window)*window;
686
idx=bits-1; /* The top bit of the window */
688
/* Scan the exponent one window at a time starting from the most
693
wvalue=0; /* The 'value' of the window */
695
/* Scan the window, squaring the result as we go */
696
for (i=0; i<window; i++,idx--)
698
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
699
wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
702
/* Fetch the appropriate pre-computed value from the pre-buf */
703
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
705
/* Multiply the result into the intermediate result */
706
if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
709
/* Convert the final result from montgomery to standard format */
710
if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
713
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
716
OPENSSL_cleanse(powerbuf,powerbufLen);
717
OPENSSL_free(powerbufFree);
719
if (am!=NULL) BN_clear(am);
720
if (computeTemp!=NULL) BN_clear(computeTemp);
645
const BIGNUM *m, BN_CTX *ctx,
646
BN_MONT_CTX *in_mont)
648
int i, bits, ret = 0, window, wvalue;
650
BN_MONT_CTX *mont = NULL;
653
unsigned char *powerbufFree = NULL;
655
unsigned char *powerbuf = NULL;
657
#if defined(SPARC_T4_MONT)
667
if (!(m->d[0] & 1)) {
668
BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
671
bits = BN_num_bits(p);
680
* Allocate a montgomery context if it was not supplied by the caller. If
681
* this is not done, things will break in the montgomery part.
686
if ((mont = BN_MONT_CTX_new()) == NULL)
688
if (!BN_MONT_CTX_set(mont, m, ctx))
694
* If the size of the operands allow it, perform the optimized
695
* RSAZ exponentiation. For further information see
696
* crypto/bn/rsaz_exp.c and accompanying assembly modules.
698
if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
699
&& rsaz_avx2_eligible()) {
700
if (NULL == bn_wexpand(rr, 16))
702
RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
709
} else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
710
if (NULL == bn_wexpand(rr, 8))
712
RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
721
/* Get the window size to use with size of p. */
722
window = BN_window_bits_for_ctime_exponent_size(bits);
723
#if defined(SPARC_T4_MONT)
724
if (window >= 5 && (top & 15) == 0 && top <= 64 &&
725
(OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) ==
726
(CFR_MONTMUL | CFR_MONTSQR) && (t4 = OPENSSL_sparcv9cap_P[0]))
730
#if defined(OPENSSL_BN_ASM_MONT5)
732
window = 5; /* ~5% improvement for RSA2048 sign, and even
735
powerbufLen += 2 * top * sizeof(m->d[0]);
741
* Allocate a buffer large enough to hold all of the pre-computed powers
742
* of am, am itself and tmp.
744
numPowers = 1 << window;
745
powerbufLen += sizeof(m->d[0]) * (top * numPowers +
747
numPowers ? (2 * top) : numPowers));
749
if (powerbufLen < 3072)
751
alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
755
(unsigned char *)OPENSSL_malloc(powerbufLen +
756
MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
760
powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
761
memset(powerbuf, 0, powerbufLen);
764
if (powerbufLen < 3072)
768
/* lay down tmp and am right after powers table */
769
tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
771
tmp.top = am.top = 0;
772
tmp.dmax = am.dmax = top;
773
tmp.neg = am.neg = 0;
774
tmp.flags = am.flags = BN_FLG_STATIC_DATA;
776
/* prepare a^0 in Montgomery domain */
777
#if 1 /* by Shay Gueron's suggestion */
778
if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
779
/* 2^(top*BN_BITS2) - m */
780
tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
781
for (i = 1; i < top; i++)
782
tmp.d[i] = (~m->d[i]) & BN_MASK2;
786
if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
789
/* prepare a^1 in Montgomery domain */
790
if (a->neg || BN_ucmp(a, m) >= 0) {
791
if (!BN_mod(&am, a, m, ctx))
793
if (!BN_to_montgomery(&am, &am, mont, ctx))
795
} else if (!BN_to_montgomery(&am, a, mont, ctx))
798
#if defined(SPARC_T4_MONT)
800
typedef int (*bn_pwr5_mont_f) (BN_ULONG *tp, const BN_ULONG *np,
801
const BN_ULONG *n0, const void *table,
802
int power, int bits);
803
int bn_pwr5_mont_t4_8(BN_ULONG *tp, const BN_ULONG *np,
804
const BN_ULONG *n0, const void *table,
805
int power, int bits);
806
int bn_pwr5_mont_t4_16(BN_ULONG *tp, const BN_ULONG *np,
807
const BN_ULONG *n0, const void *table,
808
int power, int bits);
809
int bn_pwr5_mont_t4_24(BN_ULONG *tp, const BN_ULONG *np,
810
const BN_ULONG *n0, const void *table,
811
int power, int bits);
812
int bn_pwr5_mont_t4_32(BN_ULONG *tp, const BN_ULONG *np,
813
const BN_ULONG *n0, const void *table,
814
int power, int bits);
815
static const bn_pwr5_mont_f pwr5_funcs[4] = {
816
bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16,
817
bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32
819
bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top / 16 - 1];
821
typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap,
822
const void *bp, const BN_ULONG *np,
824
int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const void *bp,
825
const BN_ULONG *np, const BN_ULONG *n0);
826
int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap,
827
const void *bp, const BN_ULONG *np,
829
int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap,
830
const void *bp, const BN_ULONG *np,
832
int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap,
833
const void *bp, const BN_ULONG *np,
835
static const bn_mul_mont_f mul_funcs[4] = {
836
bn_mul_mont_t4_8, bn_mul_mont_t4_16,
837
bn_mul_mont_t4_24, bn_mul_mont_t4_32
839
bn_mul_mont_f mul_worker = mul_funcs[top / 16 - 1];
841
void bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap,
842
const void *bp, const BN_ULONG *np,
843
const BN_ULONG *n0, int num);
844
void bn_mul_mont_t4(BN_ULONG *rp, const BN_ULONG *ap,
845
const void *bp, const BN_ULONG *np,
846
const BN_ULONG *n0, int num);
847
void bn_mul_mont_gather5_t4(BN_ULONG *rp, const BN_ULONG *ap,
848
const void *table, const BN_ULONG *np,
849
const BN_ULONG *n0, int num, int power);
850
void bn_flip_n_scatter5_t4(const BN_ULONG *inp, size_t num,
851
void *table, size_t power);
852
void bn_gather5_t4(BN_ULONG *out, size_t num,
853
void *table, size_t power);
854
void bn_flip_t4(BN_ULONG *dst, BN_ULONG *src, size_t num);
856
BN_ULONG *np = mont->N.d, *n0 = mont->n0;
857
int stride = 5 * (6 - (top / 16 - 1)); /* multiple of 5, but less
861
* BN_to_montgomery can contaminate words above .top [in
862
* BN_DEBUG[_DEBUG] build]...
864
for (i = am.top; i < top; i++)
866
for (i = tmp.top; i < top; i++)
869
bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 0);
870
bn_flip_n_scatter5_t4(am.d, top, powerbuf, 1);
871
if (!(*mul_worker) (tmp.d, am.d, am.d, np, n0) &&
872
!(*mul_worker) (tmp.d, am.d, am.d, np, n0))
873
bn_mul_mont_vis3(tmp.d, am.d, am.d, np, n0, top);
874
bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 2);
876
for (i = 3; i < 32; i++) {
877
/* Calculate a^i = a^(i-1) * a */
878
if (!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0) &&
879
!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0))
880
bn_mul_mont_vis3(tmp.d, tmp.d, am.d, np, n0, top);
881
bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, i);
884
/* switch to 64-bit domain */
885
np = alloca(top * sizeof(BN_ULONG));
887
bn_flip_t4(np, mont->N.d, top);
890
for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
891
wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
892
bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
895
* Scan the exponent one window at a time starting from the most
902
wvalue = bn_get_bits(p, bits + 1);
904
if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
906
/* retry once and fall back */
907
if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
911
wvalue >>= stride - 5;
913
bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
914
bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
915
bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
916
bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
917
bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
918
bn_mul_mont_gather5_t4(tmp.d, tmp.d, powerbuf, np, n0, top,
922
bn_flip_t4(tmp.d, tmp.d, top);
924
/* back to 32-bit domain */
926
bn_correct_top(&tmp);
927
OPENSSL_cleanse(np, top * sizeof(BN_ULONG));
930
#if defined(OPENSSL_BN_ASM_MONT5)
931
if (window == 5 && top > 1) {
933
* This optimization uses ideas from http://eprint.iacr.org/2011/239,
934
* specifically optimization of cache-timing attack countermeasures
935
* and pre-computation optimization.
939
* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
940
* 512-bit RSA is hardly relevant, we omit it to spare size...
942
void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
943
const void *table, const BN_ULONG *np,
944
const BN_ULONG *n0, int num, int power);
945
void bn_scatter5(const BN_ULONG *inp, size_t num,
946
void *table, size_t power);
947
void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
948
void bn_power5(BN_ULONG *rp, const BN_ULONG *ap,
949
const void *table, const BN_ULONG *np,
950
const BN_ULONG *n0, int num, int power);
951
int bn_get_bits5(const BN_ULONG *ap, int off);
952
int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
953
const BN_ULONG *not_used, const BN_ULONG *np,
954
const BN_ULONG *n0, int num);
956
BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
959
* BN_to_montgomery can contaminate words above .top [in
960
* BN_DEBUG[_DEBUG] build]...
962
for (i = am.top; i < top; i++)
964
for (i = tmp.top; i < top; i++)
970
for (np2 = am.d + top, i = 0; i < top; i++)
973
bn_scatter5(tmp.d, top, powerbuf, 0);
974
bn_scatter5(am.d, am.top, powerbuf, 1);
975
bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
976
bn_scatter5(tmp.d, top, powerbuf, 2);
979
for (i = 3; i < 32; i++) {
980
/* Calculate a^i = a^(i-1) * a */
981
bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
982
bn_scatter5(tmp.d, top, powerbuf, i);
985
/* same as above, but uses squaring for 1/2 of operations */
986
for (i = 4; i < 32; i *= 2) {
987
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
988
bn_scatter5(tmp.d, top, powerbuf, i);
990
for (i = 3; i < 8; i += 2) {
992
bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
993
bn_scatter5(tmp.d, top, powerbuf, i);
994
for (j = 2 * i; j < 32; j *= 2) {
995
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
996
bn_scatter5(tmp.d, top, powerbuf, j);
999
for (; i < 16; i += 2) {
1000
bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
1001
bn_scatter5(tmp.d, top, powerbuf, i);
1002
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1003
bn_scatter5(tmp.d, top, powerbuf, 2 * i);
1005
for (; i < 32; i += 2) {
1006
bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
1007
bn_scatter5(tmp.d, top, powerbuf, i);
1011
for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
1012
wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1013
bn_gather5(tmp.d, top, powerbuf, wvalue);
1016
* Scan the exponent one window at a time starting from the most
1021
for (wvalue = 0, i = 0; i < 5; i++, bits--)
1022
wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1024
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1025
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1026
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1027
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1028
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1029
bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
1033
wvalue = bn_get_bits5(p->d, bits - 4);
1035
bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
1039
ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
1041
bn_correct_top(&tmp);
1043
if (!BN_copy(rr, &tmp))
1045
goto err; /* non-zero ret means it's not error */
1050
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers))
1052
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers))
1056
* If the window size is greater than 1, then calculate
1057
* val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
1058
* powers could instead be computed as (a^(i/2))^2 to use the slight
1059
* performance advantage of sqr over mul).
1062
if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
1064
if (!MOD_EXP_CTIME_COPY_TO_PREBUF
1065
(&tmp, top, powerbuf, 2, numPowers))
1067
for (i = 3; i < numPowers; i++) {
1068
/* Calculate a^i = a^(i-1) * a */
1069
if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
1071
if (!MOD_EXP_CTIME_COPY_TO_PREBUF
1072
(&tmp, top, powerbuf, i, numPowers))
1078
for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
1079
wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1080
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
1081
(&tmp, top, powerbuf, wvalue, numPowers))
1085
* Scan the exponent one window at a time starting from the most
1089
wvalue = 0; /* The 'value' of the window */
1091
/* Scan the window, squaring the result as we go */
1092
for (i = 0; i < window; i++, bits--) {
1093
if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
1095
wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1099
* Fetch the appropriate pre-computed value from the pre-buf
1101
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
1102
(&am, top, powerbuf, wvalue, numPowers))
1105
/* Multiply the result into the intermediate result */
1106
if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
1111
/* Convert the final result from montgomery to standard format */
1112
#if defined(SPARC_T4_MONT)
1113
if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
1114
am.d[0] = 1; /* borrow am */
1115
for (i = 1; i < top; i++)
1117
if (!BN_mod_mul_montgomery(rr, &tmp, &am, mont, ctx))
1121
if (!BN_from_montgomery(rr, &tmp, mont, ctx))
1125
if ((in_mont == NULL) && (mont != NULL))
1126
BN_MONT_CTX_free(mont);
1127
if (powerbuf != NULL) {
1128
OPENSSL_cleanse(powerbuf, powerbufLen);
1130
OPENSSL_free(powerbufFree);
725
1136
int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
726
1137
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
728
BN_MONT_CTX *mont = NULL;
1139
BN_MONT_CTX *mont = NULL;
1140
int b, bits, ret = 0;
734
1145
#define BN_MOD_MUL_WORD(r, w, m) \
735
(BN_mul_word(r, (w)) && \
736
(/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
737
(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
738
/* BN_MOD_MUL_WORD is only used with 'w' large,
739
* so the BN_ucmp test is probably more overhead
740
* than always using BN_mod (which uses BN_copy if
741
* a similar test returns true). */
742
/* We can use BN_mod and do not need BN_nnmod because our
743
* accumulator is never negative (the result of BN_mod does
744
* not depend on the sign of the modulus).
1146
(BN_mul_word(r, (w)) && \
1147
(/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
1148
(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
1150
* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
1151
* probably more overhead than always using BN_mod (which uses BN_copy if
1152
* a similar test returns true).
1155
* We can use BN_mod and do not need BN_nnmod because our accumulator is
1156
* never negative (the result of BN_mod does not depend on the sign of
746
1159
#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
747
(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
749
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
751
/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
752
BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
761
BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
765
a %= m->d[0]; /* make sure that 'a' is reduced */
767
bits = BN_num_bits(p);
784
if (d == NULL || r == NULL || t == NULL) goto err;
790
if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
791
if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
794
r_is_one = 1; /* except for Montgomery factor */
798
/* The result is accumulated in the product r*w. */
799
w = a; /* bit 'bits-1' of 'p' is always set */
800
for (b = bits-2; b >= 0; b--)
802
/* First, square r*w. */
804
if ((next_w/w) != w) /* overflow */
808
if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
813
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
820
if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
823
/* Second, multiply r*w by 'a' if exponent bit is set. */
824
if (BN_is_bit_set(p, b))
827
if ((next_w/a) != w) /* overflow */
831
if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
836
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
844
/* Finally, set r:=r*w. */
849
if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
854
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
858
if (r_is_one) /* can happen only if a == 1*/
860
if (!BN_one(rr)) goto err;
864
if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
868
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
1160
(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
1162
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1163
/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1164
BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1171
if (!BN_is_odd(m)) {
1172
BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
1176
a %= m->d[0]; /* make sure that 'a' is reduced */
1178
bits = BN_num_bits(p);
1180
/* x**0 mod 1 is still zero. */
1195
d = BN_CTX_get(ctx);
1196
r = BN_CTX_get(ctx);
1197
t = BN_CTX_get(ctx);
1198
if (d == NULL || r == NULL || t == NULL)
1201
if (in_mont != NULL)
1204
if ((mont = BN_MONT_CTX_new()) == NULL)
1206
if (!BN_MONT_CTX_set(mont, m, ctx))
1210
r_is_one = 1; /* except for Montgomery factor */
1214
/* The result is accumulated in the product r*w. */
1215
w = a; /* bit 'bits-1' of 'p' is always set */
1216
for (b = bits - 2; b >= 0; b--) {
1217
/* First, square r*w. */
1219
if ((next_w / w) != w) { /* overflow */
1221
if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1225
if (!BN_MOD_MUL_WORD(r, w, m))
1232
if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
1236
/* Second, multiply r*w by 'a' if exponent bit is set. */
1237
if (BN_is_bit_set(p, b)) {
1239
if ((next_w / a) != w) { /* overflow */
1241
if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1245
if (!BN_MOD_MUL_WORD(r, w, m))
1254
/* Finally, set r:=r*w. */
1257
if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1261
if (!BN_MOD_MUL_WORD(r, w, m))
1266
if (r_is_one) { /* can happen only if a == 1 */
1270
if (!BN_from_montgomery(rr, r, mont, ctx))
1275
if ((in_mont == NULL) && (mont != NULL))
1276
BN_MONT_CTX_free(mont);
875
1282
/* The old fallback, simple version :-) */
876
1283
int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
877
const BIGNUM *m, BN_CTX *ctx)
879
int i,j,bits,ret=0,wstart,wend,window,wvalue;
882
/* Table of variables obtained from 'ctx' */
883
BIGNUM *val[TABLE_SIZE];
885
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
887
/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
888
BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
902
val[0] = BN_CTX_get(ctx);
903
if(!d || !val[0]) goto err;
905
if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
906
if (BN_is_zero(val[0]))
913
window = BN_window_bits_for_exponent_size(bits);
916
if (!BN_mod_mul(d,val[0],val[0],m,ctx))
921
if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
922
!BN_mod_mul(val[i],val[i-1],d,m,ctx))
927
start=1; /* This is used to avoid multiplication etc
928
* when there is only the value '1' in the
930
wvalue=0; /* The 'value' of the window */
931
wstart=bits-1; /* The top bit of the window */
932
wend=0; /* The bottom bit of the window */
934
if (!BN_one(r)) goto err;
938
if (BN_is_bit_set(p,wstart) == 0)
941
if (!BN_mod_mul(r,r,r,m,ctx))
943
if (wstart == 0) break;
947
/* We now have wstart on a 'set' bit, we now need to work out
948
* how bit a window to do. To do this we need to scan
949
* forward until the last set bit before the end of the
954
for (i=1; i<window; i++)
956
if (wstart-i < 0) break;
957
if (BN_is_bit_set(p,wstart-i))
965
/* wend is the size of the current window */
967
/* add the 'bytes above' */
971
if (!BN_mod_mul(r,r,r,m,ctx))
975
/* wvalue will be an odd number < 2^window */
976
if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
979
/* move the 'window' down further */
983
if (wstart < 0) break;
1284
const BIGNUM *m, BN_CTX *ctx)
1286
int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1289
/* Table of variables obtained from 'ctx' */
1290
BIGNUM *val[TABLE_SIZE];
1292
if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1293
/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1294
BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1298
bits = BN_num_bits(p);
1306
d = BN_CTX_get(ctx);
1307
val[0] = BN_CTX_get(ctx);
1311
if (!BN_nnmod(val[0], a, m, ctx))
1313
if (BN_is_zero(val[0])) {
1319
window = BN_window_bits_for_exponent_size(bits);
1321
if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1323
j = 1 << (window - 1);
1324
for (i = 1; i < j; i++) {
1325
if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1326
!BN_mod_mul(val[i], val[i - 1], d, m, ctx))
1331
start = 1; /* This is used to avoid multiplication etc
1332
* when there is only the value '1' in the
1334
wvalue = 0; /* The 'value' of the window */
1335
wstart = bits - 1; /* The top bit of the window */
1336
wend = 0; /* The bottom bit of the window */
1342
if (BN_is_bit_set(p, wstart) == 0) {
1344
if (!BN_mod_mul(r, r, r, m, ctx))
1352
* We now have wstart on a 'set' bit, we now need to work out how bit
1353
* a window to do. To do this we need to scan forward until the last
1354
* set bit before the end of the window
1359
for (i = 1; i < window; i++) {
1362
if (BN_is_bit_set(p, wstart - i)) {
1363
wvalue <<= (i - wend);
1369
/* wend is the size of the current window */
1371
/* add the 'bytes above' */
1373
for (i = 0; i < j; i++) {
1374
if (!BN_mod_mul(r, r, r, m, ctx))
1378
/* wvalue will be an odd number < 2^window */
1379
if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1382
/* move the 'window' down further */