2
* Copyright (C) 2017 Denys Vlasenko
4
* Licensed under GPLv2, see file LICENSE in this source tree.
8
/* The file is taken almost verbatim from matrixssl-3-7-2b-open/crypto/math/.
9
* Changes are flagged with //bbox
13
* @file pstm_montgomery_reduce.c
14
* @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
16
* Multiprecision Montgomery Reduction.
19
* Copyright (c) 2013-2015 INSIDE Secure Corporation
20
* Copyright (c) PeerSec Networks, 2002-2011
23
* The latest version of this code is available at http://www.matrixssl.org
25
* This software is open source; you can redistribute it and/or modify
26
* it under the terms of the GNU General Public License as published by
27
* the Free Software Foundation; either version 2 of the License, or
28
* (at your option) any later version.
30
* This General Public License does NOT permit incorporating this software
31
* into proprietary programs. If you are unable to comply with the GPL, a
32
* commercial license for this software may be purchased from INSIDE at
33
* http://www.insidesecure.com/eng/Company/Locations
35
* This program is distributed in WITHOUT ANY WARRANTY; without even the
36
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
37
* See the GNU General Public License for more details.
39
* You should have received a copy of the GNU General Public License
40
* along with this program; if not, write to the Free Software
41
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
42
* http://www.gnu.org/copyleft/gpl.html
44
/******************************************************************************/
47
//#include "../cryptoApi.h"
50
/******************************************************************************/
53
/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
54
#if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
55
#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
57
//#pragma message ("Using 32 bit x86 Assembly Optimizations")
67
"movl %5,%%eax \n\t" \
69
"addl %1,%%eax \n\t" \
70
"adcl $0,%%edx \n\t" \
71
"addl %%eax,%0 \n\t" \
72
"adcl $0,%%edx \n\t" \
73
"movl %%edx,%1 \n\t" \
74
:"=g"(_c[LO]), "=r"(cy) \
75
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
76
: "%eax", "%edx", "%cc")
82
"movzbl %%al,%1 \n\t" \
83
:"=g"(_c[LO]), "=r"(cy) \
84
:"0"(_c[LO]), "1"(cy) \
87
/******************************************************************************/
88
#elif defined(PSTM_X86_64)
89
/* x86-64 optimized */
90
#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
91
#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
93
//#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
103
"movq %5,%%rax \n\t" \
105
"addq %1,%%rax \n\t" \
106
"adcq $0,%%rdx \n\t" \
107
"addq %%rax,%0 \n\t" \
108
"adcq $0,%%rdx \n\t" \
109
"movq %%rdx,%1 \n\t" \
110
:"=g"(_c[LO]), "=r"(cy) \
111
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
112
: "%rax", "%rdx", "cc")
116
"movq 0(%5),%%rax \n\t" \
117
"movq 0(%2),%%r10 \n\t" \
118
"movq 0x8(%5),%%r11 \n\t" \
120
"addq %%r10,%%rax \n\t" \
121
"adcq $0,%%rdx \n\t" \
122
"movq 0x8(%2),%%r10 \n\t" \
123
"addq %3,%%rax \n\t" \
124
"adcq $0,%%rdx \n\t" \
125
"movq %%rax,0(%0) \n\t" \
126
"movq %%rdx,%1 \n\t" \
128
"movq %%r11,%%rax \n\t" \
129
"movq 0x10(%5),%%r11 \n\t" \
131
"addq %%r10,%%rax \n\t" \
132
"adcq $0,%%rdx \n\t" \
133
"movq 0x10(%2),%%r10 \n\t" \
134
"addq %3,%%rax \n\t" \
135
"adcq $0,%%rdx \n\t" \
136
"movq %%rax,0x8(%0) \n\t" \
137
"movq %%rdx,%1 \n\t" \
139
"movq %%r11,%%rax \n\t" \
140
"movq 0x18(%5),%%r11 \n\t" \
142
"addq %%r10,%%rax \n\t" \
143
"adcq $0,%%rdx \n\t" \
144
"movq 0x18(%2),%%r10 \n\t" \
145
"addq %3,%%rax \n\t" \
146
"adcq $0,%%rdx \n\t" \
147
"movq %%rax,0x10(%0) \n\t" \
148
"movq %%rdx,%1 \n\t" \
150
"movq %%r11,%%rax \n\t" \
151
"movq 0x20(%5),%%r11 \n\t" \
153
"addq %%r10,%%rax \n\t" \
154
"adcq $0,%%rdx \n\t" \
155
"movq 0x20(%2),%%r10 \n\t" \
156
"addq %3,%%rax \n\t" \
157
"adcq $0,%%rdx \n\t" \
158
"movq %%rax,0x18(%0) \n\t" \
159
"movq %%rdx,%1 \n\t" \
161
"movq %%r11,%%rax \n\t" \
162
"movq 0x28(%5),%%r11 \n\t" \
164
"addq %%r10,%%rax \n\t" \
165
"adcq $0,%%rdx \n\t" \
166
"movq 0x28(%2),%%r10 \n\t" \
167
"addq %3,%%rax \n\t" \
168
"adcq $0,%%rdx \n\t" \
169
"movq %%rax,0x20(%0) \n\t" \
170
"movq %%rdx,%1 \n\t" \
172
"movq %%r11,%%rax \n\t" \
173
"movq 0x30(%5),%%r11 \n\t" \
175
"addq %%r10,%%rax \n\t" \
176
"adcq $0,%%rdx \n\t" \
177
"movq 0x30(%2),%%r10 \n\t" \
178
"addq %3,%%rax \n\t" \
179
"adcq $0,%%rdx \n\t" \
180
"movq %%rax,0x28(%0) \n\t" \
181
"movq %%rdx,%1 \n\t" \
183
"movq %%r11,%%rax \n\t" \
184
"movq 0x38(%5),%%r11 \n\t" \
186
"addq %%r10,%%rax \n\t" \
187
"adcq $0,%%rdx \n\t" \
188
"movq 0x38(%2),%%r10 \n\t" \
189
"addq %3,%%rax \n\t" \
190
"adcq $0,%%rdx \n\t" \
191
"movq %%rax,0x30(%0) \n\t" \
192
"movq %%rdx,%1 \n\t" \
194
"movq %%r11,%%rax \n\t" \
196
"addq %%r10,%%rax \n\t" \
197
"adcq $0,%%rdx \n\t" \
198
"addq %3,%%rax \n\t" \
199
"adcq $0,%%rdx \n\t" \
200
"movq %%rax,0x38(%0) \n\t" \
201
"movq %%rdx,%1 \n\t" \
203
:"=r"(_c), "=r"(cy) \
204
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
205
: "%rax", "%rdx", "%r10", "%r11", "cc")
211
"movzbq %%al,%1 \n\t" \
212
:"=g"(_c[LO]), "=r"(cy) \
213
:"0"(_c[LO]), "1"(cy) \
216
/******************************************************************************/
217
#elif defined(PSTM_ARM)
226
//#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations")
230
" ADDS r0,r0,%0 \n\t" \
232
" MOVCS %0,#1 \n\t" \
233
" MOVCC %0,#0 \n\t" \
234
" UMLAL r0,%0,%3,%4 \n\t" \
236
:"=r"(cy),"=m"(_c[0])\
237
:"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
242
" ADDS r0,r0,%0 \n\t" \
245
" MOVCS %0,#1 \n\t" \
246
" MOVCC %0,#0 \n\t" \
247
:"=r"(cy),"=m"(_c[0])\
250
#else /* Non-Thumb2 code */
251
//#pragma message ("Using 32 bit ARM Assembly Optimizations")
255
" ADDS r0,r0,%0 \n\t" \
256
" MOVCS %0,#1 \n\t" \
257
" MOVCC %0,#0 \n\t" \
258
" UMLAL r0,%0,%3,%4 \n\t" \
260
:"=r"(cy),"=m"(_c[0])\
261
:"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
266
" ADDS r0,r0,%0 \n\t" \
268
" MOVCS %0,#1 \n\t" \
269
" MOVCC %0,#0 \n\t" \
270
:"=r"(cy),"=m"(_c[0])\
273
#endif /* __thumb2__ */
276
/******************************************************************************/
277
#elif defined(PSTM_MIPS)
279
//#pragma message ("Using 32 bit MIPS Assembly Optimizations")
288
" multu %3,%4 \n\t" \
291
" addu $12,$12,%0 \n\t" \
292
" sltu $10,$12,%0 \n\t" \
293
" addu $13,$13,$10 \n\t" \
295
" addu $12,$12,$10 \n\t" \
296
" sltu $10,$12,$10 \n\t" \
297
" addu %0,$13,$10 \n\t" \
299
:"=r"(cy),"=m"(_c[0])\
300
:"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\
307
" addu $10,$10,%0 \n\t" \
309
" sltu %0,$10,%0 \n\t" \
310
:"=r"(cy),"=m"(_c[0])\
315
/******************************************************************************/
327
t = ((pstm_word)_c[0] + (pstm_word)cy) + \
328
(((pstm_word)mu) * ((pstm_word)*tmpm++)); \
329
_c[0] = (pstm_digit)t; \
330
cy = (pstm_digit)(t >> DIGIT_BIT); \
334
do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0)
338
/******************************************************************************/
342
/* computes x/R == x (mod N) via Montgomery Reduction */
343
int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
344
pstm_digit mp, pstm_digit *paD, uint32 paDlen)
346
pstm_digit *c, *_c, *tmpm, mu;
348
int pa; //bbox: was int16
352
/* Sanity test for bad numbers. This will confirm no buffer overruns */
353
return PS_LIMIT_FAIL;
356
if (paD && paDlen >= (uint32)2*pa+1) {
358
memset(c, 0x0, paDlen);
360
c = xzalloc(2*pa+1);//bbox
364
for (x = 0; x < oldused; x++) {
370
for (x = 0; x < pa; x++) {
372
/* get Mu for this round */
378
for (; y < (pa & ~7); y += 8) {
383
#endif /* PSTM_X86_64 */
384
for (; y < pa; y++) {
398
for (x = 0; x < pa+1; x++) {
402
for (; x < oldused; x++) {
411
/* reuse x as return code */
414
/* if A >= m then A = A - m */
415
if (pstm_cmp_mag (a, m) != PSTM_LT) {
416
if (s_pstm_sub (a, m, a) != PSTM_OKAY) {
420
if (paDlen < (uint32)2*pa+1) {
426
#endif /* !DISABLE_PSTM */
427
/******************************************************************************/