~mmach/netext73/busybox

« back to all changes in this revision

Viewing changes to networking/tls_pstm_montgomery_reduce.c

  • Committer: mmach
  • Date: 2021-04-14 13:54:24 UTC
  • Revision ID: netbit73@gmail.com-20210414135424-8x3fxf716zs4wflb
1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (C) 2017 Denys Vlasenko
 
3
 *
 
4
 * Licensed under GPLv2, see file LICENSE in this source tree.
 
5
 */
 
6
#include "tls.h"
 
7
 
 
8
/* The file is taken almost verbatim from matrixssl-3-7-2b-open/crypto/math/.
 
9
 * Changes are flagged with //bbox
 
10
 */
 
11
 
 
12
/**
 
13
 *      @file    pstm_montgomery_reduce.c
 
14
 *      @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
 
15
 *
 
16
 *      Multiprecision Montgomery Reduction.
 
17
 */
 
18
/*
 
19
 *      Copyright (c) 2013-2015 INSIDE Secure Corporation
 
20
 *      Copyright (c) PeerSec Networks, 2002-2011
 
21
 *      All Rights Reserved
 
22
 *
 
23
 *      The latest version of this code is available at http://www.matrixssl.org
 
24
 *
 
25
 *      This software is open source; you can redistribute it and/or modify
 
26
 *      it under the terms of the GNU General Public License as published by
 
27
 *      the Free Software Foundation; either version 2 of the License, or
 
28
 *      (at your option) any later version.
 
29
 *
 
30
 *      This General Public License does NOT permit incorporating this software
 
31
 *      into proprietary programs.  If you are unable to comply with the GPL, a
 
32
 *      commercial license for this software may be purchased from INSIDE at
 
33
 *      http://www.insidesecure.com/eng/Company/Locations
 
34
 *
 
35
 *      This program is distributed in WITHOUT ANY WARRANTY; without even the
 
36
 *      implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
37
 *      See the GNU General Public License for more details.
 
38
 *
 
39
 *      You should have received a copy of the GNU General Public License
 
40
 *      along with this program; if not, write to the Free Software
 
41
 *      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
42
 *      http://www.gnu.org/copyleft/gpl.html
 
43
 */
 
44
/******************************************************************************/
 
45
 
 
46
//bbox
 
47
//#include "../cryptoApi.h"
 
48
#ifndef DISABLE_PSTM
 
49
 
 
50
/******************************************************************************/
 
51
 
 
52
#if defined(PSTM_X86)
 
53
/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
 
54
#if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
 
55
#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
 
56
#endif
 
57
//#pragma message ("Using 32 bit x86 Assembly Optimizations")
 
58
 
 
59
#define MONT_START
 
60
#define MONT_FINI
 
61
#define LOOP_END
 
62
#define LOOP_START \
 
63
   mu = c[x] * mp
 
64
 
 
65
#define INNERMUL                                          \
 
66
asm(                                                      \
 
67
   "movl %5,%%eax \n\t"                                   \
 
68
   "mull %4       \n\t"                                   \
 
69
   "addl %1,%%eax \n\t"                                   \
 
70
   "adcl $0,%%edx \n\t"                                   \
 
71
   "addl %%eax,%0 \n\t"                                   \
 
72
   "adcl $0,%%edx \n\t"                                   \
 
73
   "movl %%edx,%1 \n\t"                                   \
 
74
:"=g"(_c[LO]), "=r"(cy)                                   \
 
75
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++)              \
 
76
: "%eax", "%edx", "%cc")
 
77
 
 
78
#define PROPCARRY                           \
 
79
asm(                                        \
 
80
   "addl   %1,%0    \n\t"                   \
 
81
   "setb   %%al     \n\t"                   \
 
82
   "movzbl %%al,%1 \n\t"                    \
 
83
:"=g"(_c[LO]), "=r"(cy)                     \
 
84
:"0"(_c[LO]), "1"(cy)                       \
 
85
: "%eax", "%cc")
 
86
 
 
87
/******************************************************************************/
 
88
#elif defined(PSTM_X86_64)
 
89
/* x86-64 optimized */
 
90
#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
 
91
#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
 
92
#endif
 
93
//#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
 
94
 
 
95
#define MONT_START
 
96
#define MONT_FINI
 
97
#define LOOP_END
 
98
#define LOOP_START \
 
99
mu = c[x] * mp
 
100
 
 
101
#define INNERMUL                                           \
 
102
asm(                                                       \
 
103
        "movq %5,%%rax \n\t"                                   \
 
104
        "mulq %4       \n\t"                                   \
 
105
        "addq %1,%%rax \n\t"                                   \
 
106
        "adcq $0,%%rdx \n\t"                                   \
 
107
        "addq %%rax,%0 \n\t"                                   \
 
108
        "adcq $0,%%rdx \n\t"                                   \
 
109
        "movq %%rdx,%1 \n\t"                                   \
 
110
        :"=g"(_c[LO]), "=r"(cy)                                \
 
111
        :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++)           \
 
112
        : "%rax", "%rdx", "cc")
 
113
 
 
114
#define INNERMUL8                               \
 
115
asm(                                                    \
 
116
        "movq 0(%5),%%rax    \n\t"  \
 
117
        "movq 0(%2),%%r10    \n\t"  \
 
118
        "movq 0x8(%5),%%r11  \n\t"  \
 
119
        "mulq %4             \n\t"  \
 
120
        "addq %%r10,%%rax    \n\t"  \
 
121
        "adcq $0,%%rdx       \n\t"  \
 
122
        "movq 0x8(%2),%%r10  \n\t"  \
 
123
        "addq %3,%%rax       \n\t"  \
 
124
        "adcq $0,%%rdx       \n\t"  \
 
125
        "movq %%rax,0(%0)    \n\t"  \
 
126
        "movq %%rdx,%1       \n\t"  \
 
127
        \
 
128
        "movq %%r11,%%rax    \n\t"  \
 
129
        "movq 0x10(%5),%%r11 \n\t"  \
 
130
        "mulq %4             \n\t"  \
 
131
        "addq %%r10,%%rax    \n\t"  \
 
132
        "adcq $0,%%rdx       \n\t"  \
 
133
        "movq 0x10(%2),%%r10 \n\t"  \
 
134
        "addq %3,%%rax       \n\t"  \
 
135
        "adcq $0,%%rdx       \n\t"  \
 
136
        "movq %%rax,0x8(%0)  \n\t"  \
 
137
        "movq %%rdx,%1       \n\t"  \
 
138
        \
 
139
        "movq %%r11,%%rax    \n\t"  \
 
140
        "movq 0x18(%5),%%r11 \n\t"  \
 
141
        "mulq %4             \n\t"  \
 
142
        "addq %%r10,%%rax    \n\t"  \
 
143
        "adcq $0,%%rdx       \n\t"  \
 
144
        "movq 0x18(%2),%%r10 \n\t"  \
 
145
        "addq %3,%%rax       \n\t"  \
 
146
        "adcq $0,%%rdx       \n\t"  \
 
147
        "movq %%rax,0x10(%0) \n\t"  \
 
148
        "movq %%rdx,%1       \n\t"  \
 
149
        \
 
150
        "movq %%r11,%%rax    \n\t"  \
 
151
        "movq 0x20(%5),%%r11 \n\t"  \
 
152
        "mulq %4             \n\t"  \
 
153
        "addq %%r10,%%rax    \n\t"  \
 
154
        "adcq $0,%%rdx       \n\t"  \
 
155
        "movq 0x20(%2),%%r10 \n\t"  \
 
156
        "addq %3,%%rax       \n\t"  \
 
157
        "adcq $0,%%rdx       \n\t"  \
 
158
        "movq %%rax,0x18(%0) \n\t"  \
 
159
        "movq %%rdx,%1       \n\t"  \
 
160
        \
 
161
        "movq %%r11,%%rax    \n\t"  \
 
162
        "movq 0x28(%5),%%r11 \n\t"  \
 
163
        "mulq %4             \n\t"  \
 
164
        "addq %%r10,%%rax    \n\t"  \
 
165
        "adcq $0,%%rdx       \n\t"  \
 
166
        "movq 0x28(%2),%%r10 \n\t"  \
 
167
        "addq %3,%%rax       \n\t"  \
 
168
        "adcq $0,%%rdx       \n\t"  \
 
169
        "movq %%rax,0x20(%0) \n\t"  \
 
170
        "movq %%rdx,%1       \n\t"  \
 
171
        \
 
172
        "movq %%r11,%%rax    \n\t"  \
 
173
        "movq 0x30(%5),%%r11 \n\t"  \
 
174
        "mulq %4             \n\t"  \
 
175
        "addq %%r10,%%rax    \n\t"  \
 
176
        "adcq $0,%%rdx       \n\t"  \
 
177
        "movq 0x30(%2),%%r10 \n\t"  \
 
178
        "addq %3,%%rax       \n\t"  \
 
179
        "adcq $0,%%rdx       \n\t"  \
 
180
        "movq %%rax,0x28(%0) \n\t"  \
 
181
        "movq %%rdx,%1       \n\t"  \
 
182
        \
 
183
        "movq %%r11,%%rax    \n\t"  \
 
184
        "movq 0x38(%5),%%r11 \n\t"  \
 
185
        "mulq %4             \n\t"  \
 
186
        "addq %%r10,%%rax    \n\t"  \
 
187
        "adcq $0,%%rdx       \n\t"  \
 
188
        "movq 0x38(%2),%%r10 \n\t"  \
 
189
        "addq %3,%%rax       \n\t"  \
 
190
        "adcq $0,%%rdx       \n\t"  \
 
191
        "movq %%rax,0x30(%0) \n\t"  \
 
192
        "movq %%rdx,%1       \n\t"  \
 
193
        \
 
194
        "movq %%r11,%%rax    \n\t"  \
 
195
        "mulq %4             \n\t"  \
 
196
        "addq %%r10,%%rax    \n\t"  \
 
197
        "adcq $0,%%rdx       \n\t"  \
 
198
        "addq %3,%%rax       \n\t"  \
 
199
        "adcq $0,%%rdx       \n\t"  \
 
200
        "movq %%rax,0x38(%0) \n\t"  \
 
201
        "movq %%rdx,%1       \n\t"  \
 
202
        \
 
203
        :"=r"(_c), "=r"(cy)                    \
 
204
        : "0"(_c),  "1"(cy), "g"(mu), "r"(tmpm)\
 
205
        : "%rax", "%rdx", "%r10", "%r11", "cc")
 
206
 
 
207
#define PROPCARRY                          \
 
208
asm(                                       \
 
209
        "addq   %1,%0    \n\t"                 \
 
210
        "setb   %%al     \n\t"                 \
 
211
        "movzbq %%al,%1 \n\t"                  \
 
212
        :"=g"(_c[LO]), "=r"(cy)                \
 
213
        :"0"(_c[LO]), "1"(cy)                  \
 
214
        : "%rax", "cc")
 
215
 
 
216
/******************************************************************************/
 
217
#elif defined(PSTM_ARM)
 
218
 
 
219
#define MONT_START
 
220
#define MONT_FINI
 
221
#define LOOP_END
 
222
#define LOOP_START \
 
223
mu = c[x] * mp
 
224
 
 
225
#ifdef __thumb2__
 
226
//#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations")
 
227
#define INNERMUL                    \
 
228
asm(                                \
 
229
        " LDR    r0,%1            \n\t" \
 
230
        " ADDS   r0,r0,%0         \n\t" \
 
231
        " ITE CS                  \n\t" \
 
232
        " MOVCS  %0,#1            \n\t" \
 
233
        " MOVCC  %0,#0            \n\t" \
 
234
        " UMLAL  r0,%0,%3,%4      \n\t" \
 
235
        " STR    r0,%1            \n\t" \
 
236
        :"=r"(cy),"=m"(_c[0])\
 
237
        :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
 
238
        :"r0","%cc");
 
239
#define PROPCARRY                  \
 
240
asm(                               \
 
241
        " LDR   r0,%1            \n\t" \
 
242
        " ADDS  r0,r0,%0         \n\t" \
 
243
        " STR   r0,%1            \n\t" \
 
244
        " ITE CS                 \n\t" \
 
245
        " MOVCS %0,#1            \n\t" \
 
246
        " MOVCC %0,#0            \n\t" \
 
247
        :"=r"(cy),"=m"(_c[0])\
 
248
        :"0"(cy),"m"(_c[0])\
 
249
        :"r0","%cc");
 
250
#else /* Non-Thumb2 code */
 
251
//#pragma message ("Using 32 bit ARM Assembly Optimizations")
 
252
#define INNERMUL                    \
 
253
asm(                                \
 
254
        " LDR    r0,%1            \n\t" \
 
255
        " ADDS   r0,r0,%0         \n\t" \
 
256
        " MOVCS  %0,#1            \n\t" \
 
257
        " MOVCC  %0,#0            \n\t" \
 
258
        " UMLAL  r0,%0,%3,%4      \n\t" \
 
259
        " STR    r0,%1            \n\t" \
 
260
        :"=r"(cy),"=m"(_c[0])\
 
261
        :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
 
262
        :"r0","%cc");
 
263
#define PROPCARRY                  \
 
264
asm(                               \
 
265
        " LDR   r0,%1            \n\t" \
 
266
        " ADDS  r0,r0,%0         \n\t" \
 
267
        " STR   r0,%1            \n\t" \
 
268
        " MOVCS %0,#1            \n\t" \
 
269
        " MOVCC %0,#0            \n\t" \
 
270
        :"=r"(cy),"=m"(_c[0])\
 
271
        :"0"(cy),"m"(_c[0])\
 
272
        :"r0","%cc");
 
273
#endif /* __thumb2__ */
 
274
 
 
275
 
 
276
/******************************************************************************/
 
277
#elif defined(PSTM_MIPS)
 
278
/* MIPS32 */
 
279
//#pragma message ("Using 32 bit MIPS Assembly Optimizations")
 
280
#define MONT_START
 
281
#define MONT_FINI
 
282
#define LOOP_END
 
283
#define LOOP_START \
 
284
mu = c[x] * mp
 
285
 
 
286
#define INNERMUL                      \
 
287
asm(                                                              \
 
288
        " multu    %3,%4          \n\t"   \
 
289
        " mflo     $12            \n\t"   \
 
290
        " mfhi     $13            \n\t"   \
 
291
        " addu     $12,$12,%0     \n\t"   \
 
292
        " sltu     $10,$12,%0     \n\t"   \
 
293
        " addu     $13,$13,$10    \n\t"   \
 
294
        " lw       $10,%1         \n\t"   \
 
295
        " addu     $12,$12,$10    \n\t"   \
 
296
        " sltu     $10,$12,$10    \n\t"   \
 
297
        " addu     %0,$13,$10     \n\t"   \
 
298
        " sw       $12,%1         \n\t"   \
 
299
        :"=r"(cy),"=m"(_c[0])\
 
300
        :"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\
 
301
        :"$10","$12","$13")\
 
302
; ++tmpm;
 
303
 
 
304
#define PROPCARRY                     \
 
305
asm(                                  \
 
306
        " lw       $10,%1        \n\t"    \
 
307
        " addu     $10,$10,%0    \n\t"    \
 
308
        " sw       $10,%1        \n\t"    \
 
309
        " sltu     %0,$10,%0     \n\t"    \
 
310
        :"=r"(cy),"=m"(_c[0])\
 
311
        :"r"(cy),"r"(_c[0])\
 
312
        :"$10");
 
313
 
 
314
 
 
315
/******************************************************************************/
 
316
#else
 
317
 
 
318
/* ISO C code */
 
319
#define MONT_START
 
320
#define MONT_FINI
 
321
#define LOOP_END
 
322
#define LOOP_START \
 
323
   mu = c[x] * mp
 
324
 
 
325
#define INNERMUL                                                                                \
 
326
        do { pstm_word t;                                                                       \
 
327
                t = ((pstm_word)_c[0] + (pstm_word)cy) +                \
 
328
                        (((pstm_word)mu) * ((pstm_word)*tmpm++));       \
 
329
                _c[0] = (pstm_digit)t;                                                  \
 
330
                cy = (pstm_digit)(t >> DIGIT_BIT);                              \
 
331
        } while (0)
 
332
 
 
333
#define PROPCARRY \
 
334
   do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0)
 
335
 
 
336
#endif
 
337
 
 
338
/******************************************************************************/
 
339
 
 
340
#define LO 0
 
341
 
 
342
/* computes x/R == x (mod N) via Montgomery Reduction */
 
343
int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
 
344
                pstm_digit mp, pstm_digit *paD, uint32 paDlen)
 
345
{
 
346
        pstm_digit      *c, *_c, *tmpm, mu;
 
347
        int32           oldused, x, y;
 
348
        int             pa; //bbox: was int16
 
349
 
 
350
        pa = m->used;
 
351
        if (pa > a->alloc) {
 
352
                /* Sanity test for bad numbers.  This will confirm no buffer overruns */
 
353
                return PS_LIMIT_FAIL;
 
354
        }
 
355
 
 
356
        if (paD && paDlen >= (uint32)2*pa+1) {
 
357
                c = paD;
 
358
                memset(c, 0x0, paDlen);
 
359
        } else {
 
360
                c = xzalloc(2*pa+1);//bbox
 
361
        }
 
362
        /* copy the input */
 
363
        oldused = a->used;
 
364
        for (x = 0; x < oldused; x++) {
 
365
                c[x] = a->dp[x];
 
366
        }
 
367
 
 
368
        MONT_START;
 
369
 
 
370
        for (x = 0; x < pa; x++) {
 
371
                pstm_digit cy = 0;
 
372
                /* get Mu for this round */
 
373
                LOOP_START;
 
374
                _c   = c + x;
 
375
                tmpm = m->dp;
 
376
                y = 0;
 
377
#ifdef PSTM_X86_64
 
378
                for (; y < (pa & ~7); y += 8) {
 
379
                        INNERMUL8;
 
380
                        _c   += 8;
 
381
                        tmpm += 8;
 
382
                }
 
383
#endif /* PSTM_X86_64 */
 
384
                for (; y < pa; y++) {
 
385
                        INNERMUL;
 
386
                        ++_c;
 
387
                }
 
388
                LOOP_END;
 
389
                while (cy) {
 
390
                        PROPCARRY;
 
391
                        ++_c;
 
392
                }
 
393
        }
 
394
 
 
395
        /* now copy out */
 
396
        _c   = c + pa;
 
397
        tmpm = a->dp;
 
398
        for (x = 0; x < pa+1; x++) {
 
399
                *tmpm++ = *_c++;
 
400
        }
 
401
 
 
402
        for (; x < oldused; x++)   {
 
403
                *tmpm++ = 0;
 
404
        }
 
405
 
 
406
        MONT_FINI;
 
407
 
 
408
        a->used = pa+1;
 
409
        pstm_clamp(a);
 
410
 
 
411
        /* reuse x as return code */
 
412
        x = PSTM_OKAY;
 
413
 
 
414
        /* if A >= m then A = A - m */
 
415
        if (pstm_cmp_mag (a, m) != PSTM_LT) {
 
416
                if (s_pstm_sub (a, m, a) != PSTM_OKAY) {
 
417
                        x = PS_MEM_FAIL;
 
418
                }
 
419
        }
 
420
        if (paDlen < (uint32)2*pa+1) {
 
421
                psFree(c, pool);
 
422
        }
 
423
        return x;
 
424
}
 
425
 
 
426
#endif /* !DISABLE_PSTM */
 
427
/******************************************************************************/