2
Copyright (C) 1999-2007 The Botan Project. All rights reserved.
4
Redistribution and use in source and binary forms, for any use, with or without
5
modification, is permitted provided that the following conditions are met:
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions, and the following disclaimer.
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions, and the following disclaimer in the documentation
12
and/or other materials provided with the distribution.
14
THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) "AS IS" AND ANY EXPRESS OR IMPLIED
15
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
16
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED.
18
IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE FOR ANY DIRECT,
19
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
23
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
namespace QCA { // WRAPNS_LINE
28
/*************************************************
29
* Comba Multiplication and Squaring Source File *
30
* (C) 1999-2007 The Botan Project *
31
*************************************************/
34
#include <botan/mp_core.h>
35
namespace QCA { // WRAPNS_LINE
37
#include <botan/mp_asmi.h>
38
namespace QCA { // WRAPNS_LINE
44
/*************************************************
45
* Comba 4x4 Multiplication *
46
*************************************************/
47
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
49
word w2 = 0, w1 = 0, w0 = 0;
51
word3_muladd(&w2, &w1, &w0, x[0], y[0]);
52
z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
54
word3_muladd(&w2, &w1, &w0, x[0], y[1]);
55
word3_muladd(&w2, &w1, &w0, x[1], y[0]);
56
z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
58
word3_muladd(&w2, &w1, &w0, x[0], y[2]);
59
word3_muladd(&w2, &w1, &w0, x[1], y[1]);
60
word3_muladd(&w2, &w1, &w0, x[2], y[0]);
61
z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
63
word3_muladd(&w2, &w1, &w0, x[0], y[3]);
64
word3_muladd(&w2, &w1, &w0, x[1], y[2]);
65
word3_muladd(&w2, &w1, &w0, x[2], y[1]);
66
word3_muladd(&w2, &w1, &w0, x[3], y[0]);
67
z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
69
word3_muladd(&w2, &w1, &w0, x[1], y[3]);
70
word3_muladd(&w2, &w1, &w0, x[2], y[2]);
71
word3_muladd(&w2, &w1, &w0, x[3], y[1]);
72
z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
74
word3_muladd(&w2, &w1, &w0, x[2], y[3]);
75
word3_muladd(&w2, &w1, &w0, x[3], y[2]);
76
z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
78
word3_muladd(&w2, &w1, &w0, x[3], y[3]);
83
/*************************************************
84
* Comba 6x6 Multiplication *
85
*************************************************/
86
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
88
word w2 = 0, w1 = 0, w0 = 0;
90
word3_muladd(&w2, &w1, &w0, x[0], y[0]);
91
z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
93
word3_muladd(&w2, &w1, &w0, x[0], y[1]);
94
word3_muladd(&w2, &w1, &w0, x[1], y[0]);
95
z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
97
word3_muladd(&w2, &w1, &w0, x[0], y[2]);
98
word3_muladd(&w2, &w1, &w0, x[1], y[1]);
99
word3_muladd(&w2, &w1, &w0, x[2], y[0]);
100
z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
102
word3_muladd(&w2, &w1, &w0, x[0], y[3]);
103
word3_muladd(&w2, &w1, &w0, x[1], y[2]);
104
word3_muladd(&w2, &w1, &w0, x[2], y[1]);
105
word3_muladd(&w2, &w1, &w0, x[3], y[0]);
106
z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
108
word3_muladd(&w2, &w1, &w0, x[0], y[4]);
109
word3_muladd(&w2, &w1, &w0, x[1], y[3]);
110
word3_muladd(&w2, &w1, &w0, x[2], y[2]);
111
word3_muladd(&w2, &w1, &w0, x[3], y[1]);
112
word3_muladd(&w2, &w1, &w0, x[4], y[0]);
113
z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
115
word3_muladd(&w2, &w1, &w0, x[0], y[5]);
116
word3_muladd(&w2, &w1, &w0, x[1], y[4]);
117
word3_muladd(&w2, &w1, &w0, x[2], y[3]);
118
word3_muladd(&w2, &w1, &w0, x[3], y[2]);
119
word3_muladd(&w2, &w1, &w0, x[4], y[1]);
120
word3_muladd(&w2, &w1, &w0, x[5], y[0]);
121
z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
123
word3_muladd(&w2, &w1, &w0, x[1], y[5]);
124
word3_muladd(&w2, &w1, &w0, x[2], y[4]);
125
word3_muladd(&w2, &w1, &w0, x[3], y[3]);
126
word3_muladd(&w2, &w1, &w0, x[4], y[2]);
127
word3_muladd(&w2, &w1, &w0, x[5], y[1]);
128
z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
130
word3_muladd(&w2, &w1, &w0, x[2], y[5]);
131
word3_muladd(&w2, &w1, &w0, x[3], y[4]);
132
word3_muladd(&w2, &w1, &w0, x[4], y[3]);
133
word3_muladd(&w2, &w1, &w0, x[5], y[2]);
134
z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
136
word3_muladd(&w2, &w1, &w0, x[3], y[5]);
137
word3_muladd(&w2, &w1, &w0, x[4], y[4]);
138
word3_muladd(&w2, &w1, &w0, x[5], y[3]);
139
z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
141
word3_muladd(&w2, &w1, &w0, x[4], y[5]);
142
word3_muladd(&w2, &w1, &w0, x[5], y[4]);
143
z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
145
word3_muladd(&w2, &w1, &w0, x[5], y[5]);
150
/*************************************************
151
* Comba 8x8 Multiplication *
152
*************************************************/
153
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
155
word w2 = 0, w1 = 0, w0 = 0;
157
word3_muladd(&w2, &w1, &w0, x[0], y[0]);
158
z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
160
word3_muladd(&w2, &w1, &w0, x[0], y[1]);
161
word3_muladd(&w2, &w1, &w0, x[1], y[0]);
162
z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
164
word3_muladd(&w2, &w1, &w0, x[0], y[2]);
165
word3_muladd(&w2, &w1, &w0, x[1], y[1]);
166
word3_muladd(&w2, &w1, &w0, x[2], y[0]);
167
z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
169
word3_muladd(&w2, &w1, &w0, x[0], y[3]);
170
word3_muladd(&w2, &w1, &w0, x[1], y[2]);
171
word3_muladd(&w2, &w1, &w0, x[2], y[1]);
172
word3_muladd(&w2, &w1, &w0, x[3], y[0]);
173
z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
175
word3_muladd(&w2, &w1, &w0, x[0], y[4]);
176
word3_muladd(&w2, &w1, &w0, x[1], y[3]);
177
word3_muladd(&w2, &w1, &w0, x[2], y[2]);
178
word3_muladd(&w2, &w1, &w0, x[3], y[1]);
179
word3_muladd(&w2, &w1, &w0, x[4], y[0]);
180
z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
182
word3_muladd(&w2, &w1, &w0, x[0], y[5]);
183
word3_muladd(&w2, &w1, &w0, x[1], y[4]);
184
word3_muladd(&w2, &w1, &w0, x[2], y[3]);
185
word3_muladd(&w2, &w1, &w0, x[3], y[2]);
186
word3_muladd(&w2, &w1, &w0, x[4], y[1]);
187
word3_muladd(&w2, &w1, &w0, x[5], y[0]);
188
z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
190
word3_muladd(&w2, &w1, &w0, x[0], y[6]);
191
word3_muladd(&w2, &w1, &w0, x[1], y[5]);
192
word3_muladd(&w2, &w1, &w0, x[2], y[4]);
193
word3_muladd(&w2, &w1, &w0, x[3], y[3]);
194
word3_muladd(&w2, &w1, &w0, x[4], y[2]);
195
word3_muladd(&w2, &w1, &w0, x[5], y[1]);
196
word3_muladd(&w2, &w1, &w0, x[6], y[0]);
197
z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
199
word3_muladd(&w2, &w1, &w0, x[0], y[7]);
200
word3_muladd(&w2, &w1, &w0, x[1], y[6]);
201
word3_muladd(&w2, &w1, &w0, x[2], y[5]);
202
word3_muladd(&w2, &w1, &w0, x[3], y[4]);
203
word3_muladd(&w2, &w1, &w0, x[4], y[3]);
204
word3_muladd(&w2, &w1, &w0, x[5], y[2]);
205
word3_muladd(&w2, &w1, &w0, x[6], y[1]);
206
word3_muladd(&w2, &w1, &w0, x[7], y[0]);
207
z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
209
word3_muladd(&w2, &w1, &w0, x[1], y[7]);
210
word3_muladd(&w2, &w1, &w0, x[2], y[6]);
211
word3_muladd(&w2, &w1, &w0, x[3], y[5]);
212
word3_muladd(&w2, &w1, &w0, x[4], y[4]);
213
word3_muladd(&w2, &w1, &w0, x[5], y[3]);
214
word3_muladd(&w2, &w1, &w0, x[6], y[2]);
215
word3_muladd(&w2, &w1, &w0, x[7], y[1]);
216
z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
218
word3_muladd(&w2, &w1, &w0, x[2], y[7]);
219
word3_muladd(&w2, &w1, &w0, x[3], y[6]);
220
word3_muladd(&w2, &w1, &w0, x[4], y[5]);
221
word3_muladd(&w2, &w1, &w0, x[5], y[4]);
222
word3_muladd(&w2, &w1, &w0, x[6], y[3]);
223
word3_muladd(&w2, &w1, &w0, x[7], y[2]);
224
z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
226
word3_muladd(&w2, &w1, &w0, x[3], y[7]);
227
word3_muladd(&w2, &w1, &w0, x[4], y[6]);
228
word3_muladd(&w2, &w1, &w0, x[5], y[5]);
229
word3_muladd(&w2, &w1, &w0, x[6], y[4]);
230
word3_muladd(&w2, &w1, &w0, x[7], y[3]);
231
z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
233
word3_muladd(&w2, &w1, &w0, x[4], y[7]);
234
word3_muladd(&w2, &w1, &w0, x[5], y[6]);
235
word3_muladd(&w2, &w1, &w0, x[6], y[5]);
236
word3_muladd(&w2, &w1, &w0, x[7], y[4]);
237
z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
239
word3_muladd(&w2, &w1, &w0, x[5], y[7]);
240
word3_muladd(&w2, &w1, &w0, x[6], y[6]);
241
word3_muladd(&w2, &w1, &w0, x[7], y[5]);
242
z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
244
word3_muladd(&w2, &w1, &w0, x[6], y[7]);
245
word3_muladd(&w2, &w1, &w0, x[7], y[6]);
246
z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
248
word3_muladd(&w2, &w1, &w0, x[7], y[7]);
253
/*************************************************
254
* Comba 4x4 Squaring *
255
*************************************************/
256
void bigint_comba_sqr4(word z[8], const word x[4])
258
word w2 = 0, w1 = 0, w0 = 0;
260
word3_muladd(&w2, &w1, &w0, x[0], x[0]);
261
z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
263
word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
264
z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
266
word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
267
word3_muladd(&w2, &w1, &w0, x[1], x[1]);
268
z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
270
word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
271
word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
272
z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
274
word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
275
word3_muladd(&w2, &w1, &w0, x[2], x[2]);
276
z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
278
word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
279
z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
281
word3_muladd(&w2, &w1, &w0, x[3], x[3]);
286
/*************************************************
287
* Comba 6x6 Squaring *
288
*************************************************/
289
void bigint_comba_sqr6(word z[12], const word x[6])
291
word w2 = 0, w1 = 0, w0 = 0;
293
word3_muladd(&w2, &w1, &w0, x[0], x[0]);
294
z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
296
word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
297
z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
299
word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
300
word3_muladd(&w2, &w1, &w0, x[1], x[1]);
301
z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
303
word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
304
word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
305
z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
307
word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
308
word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
309
word3_muladd(&w2, &w1, &w0, x[2], x[2]);
310
z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
312
word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
313
word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
314
word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
315
z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
317
word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
318
word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
319
word3_muladd(&w2, &w1, &w0, x[3], x[3]);
320
z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
322
word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
323
word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
324
z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
326
word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
327
word3_muladd(&w2, &w1, &w0, x[4], x[4]);
328
z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
330
word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
331
z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
333
word3_muladd(&w2, &w1, &w0, x[5], x[5]);
338
/*************************************************
339
* Comba 8x8 Squaring *
340
*************************************************/
341
void bigint_comba_sqr8(word z[16], const word x[8])
343
word w2 = 0, w1 = 0, w0 = 0;
345
word3_muladd(&w2, &w1, &w0, x[0], x[0]);
346
z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
348
word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
349
z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
351
word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
352
word3_muladd(&w2, &w1, &w0, x[1], x[1]);
353
z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
355
word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
356
word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
357
z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
359
word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
360
word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
361
word3_muladd(&w2, &w1, &w0, x[2], x[2]);
362
z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
364
word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
365
word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
366
word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
367
z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
369
word3_muladd_2(&w2, &w1, &w0, x[0], x[6]);
370
word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
371
word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
372
word3_muladd(&w2, &w1, &w0, x[3], x[3]);
373
z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
375
word3_muladd_2(&w2, &w1, &w0, x[0], x[7]);
376
word3_muladd_2(&w2, &w1, &w0, x[1], x[6]);
377
word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
378
word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
379
z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
381
word3_muladd_2(&w2, &w1, &w0, x[1], x[7]);
382
word3_muladd_2(&w2, &w1, &w0, x[2], x[6]);
383
word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
384
word3_muladd(&w2, &w1, &w0, x[4], x[4]);
385
z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
387
word3_muladd_2(&w2, &w1, &w0, x[2], x[7]);
388
word3_muladd_2(&w2, &w1, &w0, x[3], x[6]);
389
word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
390
z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
392
word3_muladd_2(&w2, &w1, &w0, x[3], x[7]);
393
word3_muladd_2(&w2, &w1, &w0, x[4], x[6]);
394
word3_muladd(&w2, &w1, &w0, x[5], x[5]);
395
z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
397
word3_muladd_2(&w2, &w1, &w0, x[4], x[7]);
398
word3_muladd_2(&w2, &w1, &w0, x[5], x[6]);
399
z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
401
word3_muladd_2(&w2, &w1, &w0, x[5], x[7]);
402
word3_muladd(&w2, &w1, &w0, x[6], x[6]);
403
z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
405
word3_muladd_2(&w2, &w1, &w0, x[6], x[7]);
406
z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
408
word3_muladd(&w2, &w1, &w0, x[7], x[7]);