1
by weidai
Initial revision |
1 |
// integer.cpp - written and placed in the public domain by Wei Dai
|
2 |
// contains public domain code contributed by Alister Lee and Leonard Janke
|
|
3 |
||
4 |
#include "pch.h" |
|
75
by weidai
create DLL version, fix GetNextIV() bug in CTR and OFB modes |
5 |
|
6 |
#ifndef CRYPTOPP_IMPORTS
|
|
7 |
||
1
by weidai
Initial revision |
8 |
#include "integer.h" |
9 |
#include "modarith.h" |
|
10 |
#include "nbtheory.h" |
|
11 |
#include "asn.h" |
|
12 |
#include "oids.h" |
|
13 |
#include "words.h" |
|
14 |
#include "algparam.h" |
|
15 |
#include "pubkey.h" // for P1363_KDF2 |
|
16 |
#include "sha.h" |
|
270
by weidai
MMX/SSE2 optimizations |
17 |
#include "cpu.h" |
1
by weidai
Initial revision |
18 |
|
19 |
#include <iostream> |
|
20 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
21 |
#if _MSC_VER >= 1400
|
270
by weidai
MMX/SSE2 optimizations |
22 |
#include <intrin.h> |
23 |
#endif
|
|
24 |
||
25 |
#ifdef __DECCXX
|
|
26 |
#include <c_asm.h> |
|
27 |
#endif
|
|
28 |
||
29 |
#ifdef CRYPTOPP_MSVC6_NO_PP
|
|
30 |
#pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
|
|
1
by weidai
Initial revision |
31 |
#endif
|
32 |
||
532
by noloader
Added GCC_DIAGNOSTIC_AWARE to help suppress some warnings on contemporary compilers. The macro was needed to help with managing old compilers, like GCC 4.2.1, present on OpenBSD |
33 |
#if GCC_DIAGNOSTIC_AWARE
|
34 |
# pragma GCC diagnostic ignored "-Wunused-value"
|
|
35 |
# pragma GCC diagnostic ignored "-Wunused-variable"
|
|
36 |
#endif
|
|
37 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
38 |
#define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
|
39 |
||
1
by weidai
Initial revision |
40 |
NAMESPACE_BEGIN(CryptoPP) |
41 |
||
181
by weidai
changes done for FIPS-140 lab code drop |
42 |
bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt) |
27
by weidai
various changes for 5.1 |
43 |
{
|
44 |
if (valueType != typeid(Integer)) |
|
45 |
return false; |
|
46 |
*reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt); |
|
47 |
return true; |
|
48 |
}
|
|
49 |
||
270
by weidai
MMX/SSE2 optimizations |
50 |
inline static int Compare(const word *A, const word *B, size_t N) |
1
by weidai
Initial revision |
51 |
{
|
52 |
while (N--) |
|
53 |
if (A[N] > B[N]) |
|
54 |
return 1; |
|
55 |
else if (A[N] < B[N]) |
|
56 |
return -1; |
|
57 |
||
58 |
return 0; |
|
59 |
}
|
|
60 |
||
270
by weidai
MMX/SSE2 optimizations |
61 |
inline static int Increment(word *A, size_t N, word B=1) |
1
by weidai
Initial revision |
62 |
{
|
63 |
assert(N); |
|
64 |
word t = A[0]; |
|
65 |
A[0] = t+B; |
|
66 |
if (A[0] >= t) |
|
67 |
return 0; |
|
68 |
for (unsigned i=1; i<N; i++) |
|
69 |
if (++A[i]) |
|
70 |
return 0; |
|
71 |
return 1; |
|
72 |
}
|
|
73 |
||
270
by weidai
MMX/SSE2 optimizations |
74 |
inline static int Decrement(word *A, size_t N, word B=1) |
1
by weidai
Initial revision |
75 |
{
|
76 |
assert(N); |
|
77 |
word t = A[0]; |
|
78 |
A[0] = t-B; |
|
79 |
if (A[0] <= t) |
|
80 |
return 0; |
|
81 |
for (unsigned i=1; i<N; i++) |
|
82 |
if (A[i]--) |
|
83 |
return 0; |
|
84 |
return 1; |
|
85 |
}
|
|
86 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
87 |
static void TwosComplement(word *A, size_t N) |
1
by weidai
Initial revision |
88 |
{
|
89 |
Decrement(A, N); |
|
90 |
for (unsigned i=0; i<N; i++) |
|
91 |
A[i] = ~A[i]; |
|
92 |
}
|
|
93 |
||
100
by weidai
fix bugs in 64-bit CPU support |
94 |
static word AtomicInverseModPower2(word A) |
95 |
{
|
|
96 |
assert(A%2==1); |
|
97 |
||
98 |
word R=A%8; |
|
99 |
||
100 |
for (unsigned i=3; i<WORD_BITS; i*=2) |
|
1
by weidai
Initial revision |
101 |
R = R*(2-R*A); |
102 |
||
103 |
assert(R*A==1); |
|
100
by weidai
fix bugs in 64-bit CPU support |
104 |
return R; |
105 |
}
|
|
106 |
||
107 |
// ********************************************************
|
|
108 |
||
409
by weidai
fix compile with GCC 4.0.1 on MacOS X 64-bit |
109 |
#if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
|
315
by weidai
fix compile for x64, DLL and VC 6 |
110 |
#define Declare2Words(x) word x##0, x##1;
|
111 |
#define AssignWord(a, b) a##0 = b; a##1 = 0;
|
|
112 |
#define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
|
|
113 |
#define LowWord(a) a##0
|
|
114 |
#define HighWord(a) a##1
|
|
115 |
#ifdef _MSC_VER
|
|
411
by weidai
port to Sun Studio 12's 64-bit C++ Compiler 5.9 Patch 124864-09 2008/12/16 |
116 |
#define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
|
387
by weidai
fix compile with ICC 10 |
117 |
#ifndef __INTEL_COMPILER
|
118 |
#define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
|
|
119 |
#endif
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
120 |
#elif defined(__DECCXX)
|
411
by weidai
port to Sun Studio 12's 64-bit C++ Compiler 5.9 Patch 124864-09 2008/12/16 |
121 |
#define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
|
387
by weidai
fix compile with ICC 10 |
122 |
#elif defined(__x86_64__)
|
477
by weidai
port to Sun Studio 12u1 Sun C++ 5.10 SunOS_i386 128229-02 2009/09/21 |
123 |
#if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
|
411
by weidai
port to Sun Studio 12's 64-bit C++ Compiler 5.9 Patch 124864-09 2008/12/16 |
124 |
// Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
|
125 |
#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
|
|
126 |
#else
|
|
127 |
#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
|
|
128 |
#define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
|
|
129 |
#define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
|
|
130 |
#define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
|
|
131 |
#define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
|
|
132 |
#define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
|
|
133 |
#endif
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
134 |
#endif
|
411
by weidai
port to Sun Studio 12's 64-bit C++ Compiler 5.9 Patch 124864-09 2008/12/16 |
135 |
#define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
|
315
by weidai
fix compile for x64, DLL and VC 6 |
136 |
#ifndef Double3Words
|
137 |
#define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
|
|
138 |
#endif
|
|
139 |
#ifndef Acc2WordsBy2
|
|
140 |
#define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
|
|
141 |
#endif
|
|
142 |
#define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
|
|
143 |
#define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
|
|
144 |
#define GetCarry(u) u##1
|
|
145 |
#define GetBorrow(u) u##1
|
|
146 |
#else
|
|
270
by weidai
MMX/SSE2 optimizations |
147 |
#define Declare2Words(x) dword x;
|
148 |
#if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
|
|
149 |
#define MultiplyWords(p, a, b) p = __emulu(a, b);
|
|
150 |
#else
|
|
151 |
#define MultiplyWords(p, a, b) p = (dword)a*b;
|
|
152 |
#endif
|
|
153 |
#define AssignWord(a, b) a = b;
|
|
154 |
#define Add2WordsBy1(a, b, c) a = b + c;
|
|
155 |
#define Acc2WordsBy2(a, b) a += b;
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
156 |
#define LowWord(a) word(a)
|
157 |
#define HighWord(a) word(a>>WORD_BITS)
|
|
158 |
#define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
|
|
270
by weidai
MMX/SSE2 optimizations |
159 |
#define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
|
160 |
#define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
|
|
161 |
#define GetCarry(u) HighWord(u)
|
|
162 |
#define GetBorrow(u) word(u>>(WORD_BITS*2-1))
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
163 |
#endif
|
164 |
#ifndef MulAcc
|
|
165 |
#define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
|
|
166 |
#endif
|
|
167 |
#ifndef Acc2WordsBy1
|
|
270
by weidai
MMX/SSE2 optimizations |
168 |
#define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
|
315
by weidai
fix compile for x64, DLL and VC 6 |
169 |
#endif
|
170 |
#ifndef Acc3WordsBy2
|
|
171 |
#define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
|
|
270
by weidai
MMX/SSE2 optimizations |
172 |
#endif
|
173 |
||
100
by weidai
fix bugs in 64-bit CPU support |
174 |
class DWord |
175 |
{
|
|
176 |
public: |
|
177 |
DWord() {} |
|
178 |
||
179 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
180 |
explicit DWord(word low) |
|
181 |
{
|
|
182 |
m_whole = low; |
|
183 |
}
|
|
184 |
#else
|
|
185 |
explicit DWord(word low) |
|
186 |
{
|
|
187 |
m_halfs.low = low; |
|
188 |
m_halfs.high = 0; |
|
189 |
}
|
|
190 |
#endif
|
|
191 |
||
192 |
DWord(word low, word high) |
|
193 |
{
|
|
194 |
m_halfs.low = low; |
|
195 |
m_halfs.high = high; |
|
196 |
}
|
|
197 |
||
198 |
static DWord Multiply(word a, word b) |
|
199 |
{
|
|
200 |
DWord r; |
|
201 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
202 |
r.m_whole = (dword)a * b; |
|
411
by weidai
port to Sun Studio 12's 64-bit C++ Compiler 5.9 Patch 124864-09 2008/12/16 |
203 |
#elif defined(MultiplyWordsLoHi)
|
204 |
MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b); |
|
100
by weidai
fix bugs in 64-bit CPU support |
205 |
#endif
|
206 |
return r; |
|
207 |
}
|
|
208 |
||
209 |
static DWord MultiplyAndAdd(word a, word b, word c) |
|
210 |
{
|
|
211 |
DWord r = Multiply(a, b); |
|
212 |
return r += c; |
|
213 |
}
|
|
214 |
||
215 |
DWord & operator+=(word a) |
|
216 |
{
|
|
217 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
218 |
m_whole = m_whole + a; |
|
219 |
#else
|
|
220 |
m_halfs.low += a; |
|
221 |
m_halfs.high += (m_halfs.low < a); |
|
222 |
#endif
|
|
223 |
return *this; |
|
224 |
}
|
|
225 |
||
226 |
DWord operator+(word a) |
|
227 |
{
|
|
228 |
DWord r; |
|
229 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
230 |
r.m_whole = m_whole + a; |
|
231 |
#else
|
|
232 |
r.m_halfs.low = m_halfs.low + a; |
|
233 |
r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a); |
|
234 |
#endif
|
|
235 |
return r; |
|
236 |
}
|
|
237 |
||
238 |
DWord operator-(DWord a) |
|
239 |
{
|
|
240 |
DWord r; |
|
241 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
242 |
r.m_whole = m_whole - a.m_whole; |
|
243 |
#else
|
|
244 |
r.m_halfs.low = m_halfs.low - a.m_halfs.low; |
|
245 |
r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low); |
|
246 |
#endif
|
|
247 |
return r; |
|
248 |
}
|
|
249 |
||
250 |
DWord operator-(word a) |
|
251 |
{
|
|
252 |
DWord r; |
|
253 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
254 |
r.m_whole = m_whole - a; |
|
255 |
#else
|
|
256 |
r.m_halfs.low = m_halfs.low - a; |
|
257 |
r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low); |
|
258 |
#endif
|
|
259 |
return r; |
|
260 |
}
|
|
261 |
||
262 |
// returns quotient, which must fit in a word
|
|
263 |
word operator/(word divisor); |
|
264 |
||
265 |
word operator%(word a); |
|
266 |
||
267 |
bool operator!() const |
|
268 |
{
|
|
269 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
270 |
return !m_whole; |
|
271 |
#else
|
|
272 |
return !m_halfs.high && !m_halfs.low; |
|
273 |
#endif
|
|
274 |
}
|
|
275 |
||
276 |
word GetLowHalf() const {return m_halfs.low;} |
|
277 |
word GetHighHalf() const {return m_halfs.high;} |
|
278 |
word GetHighHalfAsBorrow() const {return 0-m_halfs.high;} |
|
279 |
||
280 |
private: |
|
281 |
union
|
|
282 |
{
|
|
283 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
284 |
dword m_whole; |
|
285 |
#endif
|
|
286 |
struct
|
|
287 |
{
|
|
288 |
#ifdef IS_LITTLE_ENDIAN
|
|
289 |
word low; |
|
290 |
word high; |
|
291 |
#else
|
|
292 |
word high; |
|
293 |
word low; |
|
294 |
#endif
|
|
295 |
} m_halfs; |
|
296 |
};
|
|
297 |
};
|
|
298 |
||
299 |
class Word |
|
300 |
{
|
|
301 |
public: |
|
302 |
Word() {} |
|
303 |
||
304 |
Word(word value) |
|
305 |
{
|
|
306 |
m_whole = value; |
|
307 |
}
|
|
308 |
||
309 |
Word(hword low, hword high) |
|
310 |
{
|
|
311 |
m_whole = low | (word(high) << (WORD_BITS/2)); |
|
312 |
}
|
|
313 |
||
314 |
static Word Multiply(hword a, hword b) |
|
315 |
{
|
|
316 |
Word r; |
|
317 |
r.m_whole = (word)a * b; |
|
318 |
return r; |
|
319 |
}
|
|
320 |
||
321 |
Word operator-(Word a) |
|
322 |
{
|
|
323 |
Word r; |
|
324 |
r.m_whole = m_whole - a.m_whole; |
|
325 |
return r; |
|
326 |
}
|
|
327 |
||
328 |
Word operator-(hword a) |
|
329 |
{
|
|
330 |
Word r; |
|
331 |
r.m_whole = m_whole - a; |
|
332 |
return r; |
|
333 |
}
|
|
334 |
||
335 |
// returns quotient, which must fit in a word
|
|
336 |
hword operator/(hword divisor) |
|
337 |
{
|
|
338 |
return hword(m_whole / divisor); |
|
339 |
}
|
|
340 |
||
341 |
bool operator!() const |
|
342 |
{
|
|
343 |
return !m_whole; |
|
344 |
}
|
|
345 |
||
346 |
word GetWhole() const {return m_whole;} |
|
347 |
hword GetLowHalf() const {return hword(m_whole);} |
|
348 |
hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));} |
|
349 |
hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));} |
|
350 |
||
351 |
private: |
|
352 |
word m_whole; |
|
353 |
};
|
|
354 |
||
355 |
// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
|
|
356 |
template <class S, class D> |
|
357 |
S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL) |
|
358 |
{
|
|
359 |
// assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
|
|
360 |
assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); |
|
361 |
||
362 |
// estimate the quotient: do a 2 S by 1 S divide
|
|
363 |
S Q; |
|
364 |
if (S(B1+1) == 0) |
|
365 |
Q = A[2]; |
|
443
by weidai
fix Integer operator<< output on Windows x64 |
366 |
else if (B1 > 0) |
100
by weidai
fix bugs in 64-bit CPU support |
367 |
Q = D(A[1], A[2]) / S(B1+1); |
443
by weidai
fix Integer operator<< output on Windows x64 |
368 |
else
|
369 |
Q = D(A[0], A[1]) / B0; |
|
100
by weidai
fix bugs in 64-bit CPU support |
370 |
|
371 |
// now subtract Q*B from A
|
|
372 |
D p = D::Multiply(B0, Q); |
|
373 |
D u = (D) A[0] - p.GetLowHalf(); |
|
374 |
A[0] = u.GetLowHalf(); |
|
375 |
u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q); |
|
376 |
A[1] = u.GetLowHalf(); |
|
377 |
A[2] += u.GetHighHalf(); |
|
378 |
||
379 |
// Q <= actual quotient, so fix it
|
|
380 |
while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) |
|
381 |
{
|
|
382 |
u = (D) A[0] - B0; |
|
383 |
A[0] = u.GetLowHalf(); |
|
384 |
u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); |
|
385 |
A[1] = u.GetLowHalf(); |
|
386 |
A[2] += u.GetHighHalf(); |
|
387 |
Q++; |
|
388 |
assert(Q); // shouldn't overflow |
|
389 |
}
|
|
390 |
||
391 |
return Q; |
|
392 |
}
|
|
393 |
||
394 |
// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
|
|
395 |
template <class S, class D> |
|
396 |
inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) |
|
397 |
{
|
|
398 |
if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) |
|
399 |
return D(Ah.GetLowHalf(), Ah.GetHighHalf()); |
|
400 |
else
|
|
401 |
{
|
|
402 |
S Q[2]; |
|
403 |
T[0] = Al.GetLowHalf(); |
|
404 |
T[1] = Al.GetHighHalf(); |
|
405 |
T[2] = Ah.GetLowHalf(); |
|
406 |
T[3] = Ah.GetHighHalf(); |
|
407 |
Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf()); |
|
408 |
Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf()); |
|
409 |
return D(Q[0], Q[1]); |
|
410 |
}
|
|
411 |
}
|
|
412 |
||
413 |
// returns quotient, which must fit in a word
|
|
414 |
inline word DWord::operator/(word a) |
|
415 |
{
|
|
416 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
417 |
return word(m_whole / a); |
|
418 |
#else
|
|
419 |
hword r[4]; |
|
420 |
return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole(); |
|
421 |
#endif
|
|
422 |
}
|
|
423 |
||
424 |
inline word DWord::operator%(word a) |
|
425 |
{
|
|
426 |
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
|
|
427 |
return word(m_whole % a); |
|
428 |
#else
|
|
429 |
if (a < (word(1) << (WORD_BITS/2))) |
|
430 |
{
|
|
431 |
hword h = hword(a); |
|
432 |
word r = m_halfs.high % h; |
|
433 |
r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; |
|
434 |
return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h); |
|
435 |
}
|
|
436 |
else
|
|
437 |
{
|
|
438 |
hword r[4]; |
|
439 |
DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a); |
|
440 |
return Word(r[0], r[1]).GetWhole(); |
|
441 |
}
|
|
442 |
#endif
|
|
1
by weidai
Initial revision |
443 |
}
|
444 |
||
445 |
// ********************************************************
|
|
446 |
||
109
by weidai
enable SSE2 intrinsics on GCC 3.3 or later |
447 |
// use some tricks to share assembly code between MSVC and GCC
|
270
by weidai
MMX/SSE2 optimizations |
448 |
#if defined(__GNUC__)
|
113
by weidai
unify GCC and MSVC multiplication code |
449 |
#define AddPrologue \
|
351
by weidai
revert to int return value for Add and Sub |
450 |
int result; \
|
113
by weidai
unify GCC and MSVC multiplication code |
451 |
__asm__ __volatile__ \
|
109
by weidai
enable SSE2 intrinsics on GCC 3.3 or later |
452 |
( \
|
270
by weidai
MMX/SSE2 optimizations |
453 |
".intel_syntax noprefix;"
|
113
by weidai
unify GCC and MSVC multiplication code |
454 |
#define AddEpilogue \
|
455 |
".att_syntax prefix;" \
|
|
307
by weidai
fix compile with Intel compiler |
456 |
: "=a" (result)\
|
457 |
: "d" (C), "a" (A), "D" (B), "c" (N) \
|
|
270
by weidai
MMX/SSE2 optimizations |
458 |
: "%esi", "memory", "cc" \
|
307
by weidai
fix compile with Intel compiler |
459 |
);\
|
460 |
return result;
|
|
113
by weidai
unify GCC and MSVC multiplication code |
461 |
#define MulPrologue \
|
462 |
__asm__ __volatile__ \
|
|
463 |
( \
|
|
270
by weidai
MMX/SSE2 optimizations |
464 |
".intel_syntax noprefix;" \
|
465 |
AS1( push ebx) \
|
|
466 |
AS2( mov ebx, edx)
|
|
113
by weidai
unify GCC and MSVC multiplication code |
467 |
#define MulEpilogue \
|
270
by weidai
MMX/SSE2 optimizations |
468 |
AS1( pop ebx) \
|
469 |
".att_syntax prefix;" \
|
|
470 |
: \
|
|
471 |
: "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
|
|
472 |
: "%esi", "memory", "cc" \
|
|
473 |
);
|
|
474 |
#define SquPrologue MulPrologue
|
|
475 |
#define SquEpilogue \
|
|
476 |
AS1( pop ebx) \
|
|
477 |
".att_syntax prefix;" \
|
|
478 |
: \
|
|
479 |
: "d" (s_maskLow16), "c" (C), "a" (A) \
|
|
480 |
: "%esi", "%edi", "memory", "cc" \
|
|
481 |
);
|
|
482 |
#define TopPrologue MulPrologue
|
|
483 |
#define TopEpilogue \
|
|
484 |
AS1( pop ebx) \
|
|
485 |
".att_syntax prefix;" \
|
|
486 |
: \
|
|
487 |
: "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
|
|
488 |
: "memory", "cc" \
|
|
489 |
);
|
|
490 |
#else
|
|
491 |
#define AddPrologue \
|
|
307
by weidai
fix compile with Intel compiler |
492 |
__asm push edi \
|
270
by weidai
MMX/SSE2 optimizations |
493 |
__asm push esi \
|
494 |
__asm mov eax, [esp+12] \
|
|
307
by weidai
fix compile with Intel compiler |
495 |
__asm mov edi, [esp+16]
|
270
by weidai
MMX/SSE2 optimizations |
496 |
#define AddEpilogue \
|
497 |
__asm pop esi \
|
|
307
by weidai
fix compile with Intel compiler |
498 |
__asm pop edi \
|
270
by weidai
MMX/SSE2 optimizations |
499 |
__asm ret 8
|
315
by weidai
fix compile for x64, DLL and VC 6 |
500 |
#if _MSC_VER < 1300
|
501 |
#define SaveEBX __asm push ebx
|
|
502 |
#define RestoreEBX __asm pop ebx
|
|
503 |
#else
|
|
504 |
#define SaveEBX
|
|
505 |
#define RestoreEBX
|
|
506 |
#endif
|
|
270
by weidai
MMX/SSE2 optimizations |
507 |
#define SquPrologue \
|
508 |
AS2( mov eax, A) \
|
|
509 |
AS2( mov ecx, C) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
510 |
SaveEBX \
|
270
by weidai
MMX/SSE2 optimizations |
511 |
AS2( lea ebx, s_maskLow16)
|
512 |
#define MulPrologue \
|
|
513 |
AS2( mov eax, A) \
|
|
514 |
AS2( mov edi, B) \
|
|
515 |
AS2( mov ecx, C) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
516 |
SaveEBX \
|
270
by weidai
MMX/SSE2 optimizations |
517 |
AS2( lea ebx, s_maskLow16)
|
518 |
#define TopPrologue \
|
|
519 |
AS2( mov eax, A) \
|
|
520 |
AS2( mov edi, B) \
|
|
521 |
AS2( mov ecx, C) \
|
|
522 |
AS2( mov esi, L) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
523 |
SaveEBX \
|
270
by weidai
MMX/SSE2 optimizations |
524 |
AS2( lea ebx, s_maskLow16)
|
315
by weidai
fix compile for x64, DLL and VC 6 |
525 |
#define SquEpilogue RestoreEBX
|
526 |
#define MulEpilogue RestoreEBX
|
|
527 |
#define TopEpilogue RestoreEBX
|
|
270
by weidai
MMX/SSE2 optimizations |
528 |
#endif
|
529 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
530 |
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
|
270
by weidai
MMX/SSE2 optimizations |
531 |
extern "C" { |
351
by weidai
revert to int return value for Add and Sub |
532 |
int Baseline_Add(size_t N, word *C, const word *A, const word *B); |
533 |
int Baseline_Sub(size_t N, word *C, const word *A, const word *B); |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
534 |
}
|
432
by weidai
fix compile on OpenBSD 4.4 |
535 |
#elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
|
351
by weidai
revert to int return value for Add and Sub |
536 |
int Baseline_Add(size_t N, word *C, const word *A, const word *B) |
315
by weidai
fix compile for x64, DLL and VC 6 |
537 |
{
|
538 |
word result; |
|
539 |
__asm__ __volatile__ |
|
540 |
(
|
|
541 |
".intel_syntax;"
|
|
542 |
AS1( neg %1) |
|
543 |
ASJ( jz, 1, f) |
|
544 |
AS2( mov %0,[%3+8*%1]) |
|
545 |
AS2( add %0,[%4+8*%1]) |
|
546 |
AS2( mov [%2+8*%1],%0) |
|
547 |
ASL(0) |
|
548 |
AS2( mov %0,[%3+8*%1+8]) |
|
549 |
AS2( adc %0,[%4+8*%1+8]) |
|
550 |
AS2( mov [%2+8*%1+8],%0) |
|
551 |
AS2( lea %1,[%1+2]) |
|
552 |
ASJ( jrcxz, 1, f) |
|
553 |
AS2( mov %0,[%3+8*%1]) |
|
554 |
AS2( adc %0,[%4+8*%1]) |
|
555 |
AS2( mov [%2+8*%1],%0) |
|
556 |
ASJ( jmp, 0, b) |
|
557 |
ASL(1) |
|
558 |
AS2( mov %0, 0) |
|
559 |
AS2( adc %0, %0) |
|
560 |
".att_syntax;"
|
|
402
by weidai
fixes for GCC 4.3.2 (reports from Chris Morgan and DiegoT) |
561 |
: "=&r" (result), "+c" (N) |
562 |
: "r" (C+N), "r" (A+N), "r" (B+N) |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
563 |
: "memory", "cc" |
564 |
);
|
|
351
by weidai
revert to int return value for Add and Sub |
565 |
return (int)result; |
315
by weidai
fix compile for x64, DLL and VC 6 |
566 |
}
|
567 |
||
351
by weidai
revert to int return value for Add and Sub |
568 |
int Baseline_Sub(size_t N, word *C, const word *A, const word *B) |
315
by weidai
fix compile for x64, DLL and VC 6 |
569 |
{
|
570 |
word result; |
|
571 |
__asm__ __volatile__ |
|
572 |
(
|
|
573 |
".intel_syntax;"
|
|
574 |
AS1( neg %1) |
|
575 |
ASJ( jz, 1, f) |
|
576 |
AS2( mov %0,[%3+8*%1]) |
|
577 |
AS2( sub %0,[%4+8*%1]) |
|
578 |
AS2( mov [%2+8*%1],%0) |
|
579 |
ASL(0) |
|
580 |
AS2( mov %0,[%3+8*%1+8]) |
|
581 |
AS2( sbb %0,[%4+8*%1+8]) |
|
582 |
AS2( mov [%2+8*%1+8],%0) |
|
583 |
AS2( lea %1,[%1+2]) |
|
584 |
ASJ( jrcxz, 1, f) |
|
585 |
AS2( mov %0,[%3+8*%1]) |
|
586 |
AS2( sbb %0,[%4+8*%1]) |
|
587 |
AS2( mov [%2+8*%1],%0) |
|
588 |
ASJ( jmp, 0, b) |
|
589 |
ASL(1) |
|
590 |
AS2( mov %0, 0) |
|
591 |
AS2( adc %0, %0) |
|
592 |
".att_syntax;"
|
|
402
by weidai
fixes for GCC 4.3.2 (reports from Chris Morgan and DiegoT) |
593 |
: "=&r" (result), "+c" (N) |
594 |
: "r" (C+N), "r" (A+N), "r" (B+N) |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
595 |
: "memory", "cc" |
596 |
);
|
|
351
by weidai
revert to int return value for Add and Sub |
597 |
return (int)result; |
315
by weidai
fix compile for x64, DLL and VC 6 |
598 |
}
|
599 |
#elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
|
|
351
by weidai
revert to int return value for Add and Sub |
600 |
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) |
270
by weidai
MMX/SSE2 optimizations |
601 |
{
|
602 |
AddPrologue
|
|
603 |
||
307
by weidai
fix compile with Intel compiler |
604 |
// now: eax = A, edi = B, edx = C, ecx = N
|
270
by weidai
MMX/SSE2 optimizations |
605 |
AS2( lea eax, [eax+4*ecx]) |
307
by weidai
fix compile with Intel compiler |
606 |
AS2( lea edi, [edi+4*ecx]) |
270
by weidai
MMX/SSE2 optimizations |
607 |
AS2( lea edx, [edx+4*ecx]) |
608 |
||
609 |
AS1( neg ecx) // ecx is negative index |
|
610 |
AS2( test ecx, 2) // this clears carry flag |
|
611 |
ASJ( jz, 0, f) |
|
612 |
AS2( sub ecx, 2) |
|
613 |
ASJ( jmp, 1, f) |
|
614 |
||
615 |
ASL(0) |
|
616 |
ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero |
|
617 |
AS2( mov esi,[eax+4*ecx]) |
|
307
by weidai
fix compile with Intel compiler |
618 |
AS2( adc esi,[edi+4*ecx]) |
270
by weidai
MMX/SSE2 optimizations |
619 |
AS2( mov [edx+4*ecx],esi) |
620 |
AS2( mov esi,[eax+4*ecx+4]) |
|
307
by weidai
fix compile with Intel compiler |
621 |
AS2( adc esi,[edi+4*ecx+4]) |
270
by weidai
MMX/SSE2 optimizations |
622 |
AS2( mov [edx+4*ecx+4],esi) |
623 |
ASL(1) |
|
624 |
AS2( mov esi,[eax+4*ecx+8]) |
|
307
by weidai
fix compile with Intel compiler |
625 |
AS2( adc esi,[edi+4*ecx+8]) |
270
by weidai
MMX/SSE2 optimizations |
626 |
AS2( mov [edx+4*ecx+8],esi) |
627 |
AS2( mov esi,[eax+4*ecx+12]) |
|
307
by weidai
fix compile with Intel compiler |
628 |
AS2( adc esi,[edi+4*ecx+12]) |
270
by weidai
MMX/SSE2 optimizations |
629 |
AS2( mov [edx+4*ecx+12],esi) |
630 |
||
631 |
AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2 |
|
632 |
ASJ( jmp, 0, b) |
|
633 |
||
634 |
ASL(2) |
|
635 |
AS2( mov eax, 0) |
|
636 |
AS1( setc al) // store carry into eax (return result register) |
|
637 |
||
638 |
AddEpilogue
|
|
639 |
}
|
|
640 |
||
351
by weidai
revert to int return value for Add and Sub |
641 |
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) |
270
by weidai
MMX/SSE2 optimizations |
642 |
{
|
643 |
AddPrologue
|
|
644 |
||
307
by weidai
fix compile with Intel compiler |
645 |
// now: eax = A, edi = B, edx = C, ecx = N
|
270
by weidai
MMX/SSE2 optimizations |
646 |
AS2( lea eax, [eax+4*ecx]) |
307
by weidai
fix compile with Intel compiler |
647 |
AS2( lea edi, [edi+4*ecx]) |
270
by weidai
MMX/SSE2 optimizations |
648 |
AS2( lea edx, [edx+4*ecx]) |
649 |
||
650 |
AS1( neg ecx) // ecx is negative index |
|
651 |
AS2( test ecx, 2) // this clears carry flag |
|
652 |
ASJ( jz, 0, f) |
|
653 |
AS2( sub ecx, 2) |
|
654 |
ASJ( jmp, 1, f) |
|
655 |
||
656 |
ASL(0) |
|
657 |
ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero |
|
658 |
AS2( mov esi,[eax+4*ecx]) |
|
307
by weidai
fix compile with Intel compiler |
659 |
AS2( sbb esi,[edi+4*ecx]) |
270
by weidai
MMX/SSE2 optimizations |
660 |
AS2( mov [edx+4*ecx],esi) |
661 |
AS2( mov esi,[eax+4*ecx+4]) |
|
307
by weidai
fix compile with Intel compiler |
662 |
AS2( sbb esi,[edi+4*ecx+4]) |
270
by weidai
MMX/SSE2 optimizations |
663 |
AS2( mov [edx+4*ecx+4],esi) |
664 |
ASL(1) |
|
665 |
AS2( mov esi,[eax+4*ecx+8]) |
|
307
by weidai
fix compile with Intel compiler |
666 |
AS2( sbb esi,[edi+4*ecx+8]) |
270
by weidai
MMX/SSE2 optimizations |
667 |
AS2( mov [edx+4*ecx+8],esi) |
668 |
AS2( mov esi,[eax+4*ecx+12]) |
|
307
by weidai
fix compile with Intel compiler |
669 |
AS2( sbb esi,[edi+4*ecx+12]) |
270
by weidai
MMX/SSE2 optimizations |
670 |
AS2( mov [edx+4*ecx+12],esi) |
671 |
||
672 |
AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2 |
|
673 |
ASJ( jmp, 0, b) |
|
674 |
||
675 |
ASL(2) |
|
676 |
AS2( mov eax, 0) |
|
677 |
AS1( setc al) // store carry into eax (return result register) |
|
678 |
||
679 |
AddEpilogue
|
|
680 |
}
|
|
681 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
682 |
#if CRYPTOPP_INTEGER_SSE2
|
351
by weidai
revert to int return value for Add and Sub |
683 |
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B) |
270
by weidai
MMX/SSE2 optimizations |
684 |
{
|
685 |
AddPrologue
|
|
686 |
||
307
by weidai
fix compile with Intel compiler |
687 |
// now: eax = A, edi = B, edx = C, ecx = N
|
270
by weidai
MMX/SSE2 optimizations |
688 |
AS2( lea eax, [eax+4*ecx]) |
307
by weidai
fix compile with Intel compiler |
689 |
AS2( lea edi, [edi+4*ecx]) |
270
by weidai
MMX/SSE2 optimizations |
690 |
AS2( lea edx, [edx+4*ecx]) |
691 |
||
692 |
AS1( neg ecx) // ecx is negative index |
|
693 |
AS2( pxor mm2, mm2) |
|
694 |
ASJ( jz, 2, f) |
|
695 |
AS2( test ecx, 2) // this clears carry flag |
|
696 |
ASJ( jz, 0, f) |
|
697 |
AS2( sub ecx, 2) |
|
698 |
ASJ( jmp, 1, f) |
|
699 |
||
700 |
ASL(0) |
|
701 |
AS2( movd mm0, DWORD PTR [eax+4*ecx]) |
|
307
by weidai
fix compile with Intel compiler |
702 |
AS2( movd mm1, DWORD PTR [edi+4*ecx]) |
270
by weidai
MMX/SSE2 optimizations |
703 |
AS2( paddq mm0, mm1) |
704 |
AS2( paddq mm2, mm0) |
|
705 |
AS2( movd DWORD PTR [edx+4*ecx], mm2) |
|
706 |
AS2( psrlq mm2, 32) |
|
707 |
||
708 |
AS2( movd mm0, DWORD PTR [eax+4*ecx+4]) |
|
307
by weidai
fix compile with Intel compiler |
709 |
AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) |
270
by weidai
MMX/SSE2 optimizations |
710 |
AS2( paddq mm0, mm1) |
711 |
AS2( paddq mm2, mm0) |
|
712 |
AS2( movd DWORD PTR [edx+4*ecx+4], mm2) |
|
713 |
AS2( psrlq mm2, 32) |
|
714 |
||
715 |
ASL(1) |
|
716 |
AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) |
|
307
by weidai
fix compile with Intel compiler |
717 |
AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) |
270
by weidai
MMX/SSE2 optimizations |
718 |
AS2( paddq mm0, mm1) |
719 |
AS2( paddq mm2, mm0) |
|
720 |
AS2( movd DWORD PTR [edx+4*ecx+8], mm2) |
|
721 |
AS2( psrlq mm2, 32) |
|
722 |
||
723 |
AS2( movd mm0, DWORD PTR [eax+4*ecx+12]) |
|
307
by weidai
fix compile with Intel compiler |
724 |
AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) |
270
by weidai
MMX/SSE2 optimizations |
725 |
AS2( paddq mm0, mm1) |
726 |
AS2( paddq mm2, mm0) |
|
727 |
AS2( movd DWORD PTR [edx+4*ecx+12], mm2) |
|
728 |
AS2( psrlq mm2, 32) |
|
729 |
||
730 |
AS2( add ecx, 4) |
|
731 |
ASJ( jnz, 0, b) |
|
732 |
||
733 |
ASL(2) |
|
734 |
AS2( movd eax, mm2) |
|
735 |
AS1( emms) |
|
736 |
||
737 |
AddEpilogue
|
|
738 |
}
|
|
351
by weidai
revert to int return value for Add and Sub |
739 |
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B) |
270
by weidai
MMX/SSE2 optimizations |
740 |
{
|
741 |
AddPrologue
|
|
742 |
||
307
by weidai
fix compile with Intel compiler |
743 |
// now: eax = A, edi = B, edx = C, ecx = N
|
270
by weidai
MMX/SSE2 optimizations |
744 |
AS2( lea eax, [eax+4*ecx]) |
307
by weidai
fix compile with Intel compiler |
745 |
AS2( lea edi, [edi+4*ecx]) |
270
by weidai
MMX/SSE2 optimizations |
746 |
AS2( lea edx, [edx+4*ecx]) |
747 |
||
748 |
AS1( neg ecx) // ecx is negative index |
|
749 |
AS2( pxor mm2, mm2) |
|
750 |
ASJ( jz, 2, f) |
|
751 |
AS2( test ecx, 2) // this clears carry flag |
|
752 |
ASJ( jz, 0, f) |
|
753 |
AS2( sub ecx, 2) |
|
754 |
ASJ( jmp, 1, f) |
|
755 |
||
756 |
ASL(0) |
|
757 |
AS2( movd mm0, DWORD PTR [eax+4*ecx]) |
|
307
by weidai
fix compile with Intel compiler |
758 |
AS2( movd mm1, DWORD PTR [edi+4*ecx]) |
270
by weidai
MMX/SSE2 optimizations |
759 |
AS2( psubq mm0, mm1) |
760 |
AS2( psubq mm0, mm2) |
|
761 |
AS2( movd DWORD PTR [edx+4*ecx], mm0) |
|
762 |
AS2( psrlq mm0, 63) |
|
763 |
||
764 |
AS2( movd mm2, DWORD PTR [eax+4*ecx+4]) |
|
307
by weidai
fix compile with Intel compiler |
765 |
AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) |
270
by weidai
MMX/SSE2 optimizations |
766 |
AS2( psubq mm2, mm1) |
767 |
AS2( psubq mm2, mm0) |
|
768 |
AS2( movd DWORD PTR [edx+4*ecx+4], mm2) |
|
769 |
AS2( psrlq mm2, 63) |
|
770 |
||
771 |
ASL(1) |
|
772 |
AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) |
|
307
by weidai
fix compile with Intel compiler |
773 |
AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) |
270
by weidai
MMX/SSE2 optimizations |
774 |
AS2( psubq mm0, mm1) |
775 |
AS2( psubq mm0, mm2) |
|
776 |
AS2( movd DWORD PTR [edx+4*ecx+8], mm0) |
|
777 |
AS2( psrlq mm0, 63) |
|
778 |
||
779 |
AS2( movd mm2, DWORD PTR [eax+4*ecx+12]) |
|
307
by weidai
fix compile with Intel compiler |
780 |
AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) |
270
by weidai
MMX/SSE2 optimizations |
781 |
AS2( psubq mm2, mm1) |
782 |
AS2( psubq mm2, mm0) |
|
783 |
AS2( movd DWORD PTR [edx+4*ecx+12], mm2) |
|
784 |
AS2( psrlq mm2, 63) |
|
785 |
||
786 |
AS2( add ecx, 4) |
|
787 |
ASJ( jnz, 0, b) |
|
788 |
||
789 |
ASL(2) |
|
790 |
AS2( movd eax, mm2) |
|
791 |
AS1( emms) |
|
792 |
||
793 |
AddEpilogue
|
|
794 |
}
|
|
307
by weidai
fix compile with Intel compiler |
795 |
#endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE |
270
by weidai
MMX/SSE2 optimizations |
796 |
#else
|
351
by weidai
revert to int return value for Add and Sub |
797 |
int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) |
270
by weidai
MMX/SSE2 optimizations |
798 |
{
|
799 |
assert (N%2 == 0); |
|
800 |
||
801 |
Declare2Words(u); |
|
308
by weidai
fix compile on Sun CC |
802 |
AssignWord(u, 0); |
270
by weidai
MMX/SSE2 optimizations |
803 |
for (size_t i=0; i<N; i+=2) |
804 |
{
|
|
805 |
AddWithCarry(u, A[i], B[i]); |
|
806 |
C[i] = LowWord(u); |
|
807 |
AddWithCarry(u, A[i+1], B[i+1]); |
|
808 |
C[i+1] = LowWord(u); |
|
809 |
}
|
|
810 |
return int(GetCarry(u)); |
|
811 |
}
|
|
812 |
||
351
by weidai
revert to int return value for Add and Sub |
813 |
int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) |
270
by weidai
MMX/SSE2 optimizations |
814 |
{
|
815 |
assert (N%2 == 0); |
|
816 |
||
817 |
Declare2Words(u); |
|
308
by weidai
fix compile on Sun CC |
818 |
AssignWord(u, 0); |
270
by weidai
MMX/SSE2 optimizations |
819 |
for (size_t i=0; i<N; i+=2) |
820 |
{
|
|
821 |
SubtractWithBorrow(u, A[i], B[i]); |
|
822 |
C[i] = LowWord(u); |
|
823 |
SubtractWithBorrow(u, A[i+1], B[i+1]); |
|
824 |
C[i+1] = LowWord(u); |
|
825 |
}
|
|
826 |
return int(GetBorrow(u)); |
|
827 |
}
|
|
828 |
#endif
|
|
829 |
||
830 |
static word LinearMultiply(word *C, const word *A, word B, size_t N) |
|
831 |
{
|
|
832 |
word carry=0; |
|
833 |
for(unsigned i=0; i<N; i++) |
|
834 |
{
|
|
835 |
Declare2Words(p); |
|
836 |
MultiplyWords(p, A[i], B); |
|
837 |
Acc2WordsBy1(p, carry); |
|
838 |
C[i] = LowWord(p); |
|
839 |
carry = HighWord(p); |
|
840 |
}
|
|
841 |
return carry; |
|
842 |
}
|
|
843 |
||
363
by weidai
fix possible branch prediction analysis (BPA) vulnerability |
844 |
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
845 |
||
270
by weidai
MMX/SSE2 optimizations |
846 |
#define Mul_2 \
|
847 |
Mul_Begin(2) \
|
|
848 |
Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
849 |
Mul_End(1, 1)
|
270
by weidai
MMX/SSE2 optimizations |
850 |
|
851 |
#define Mul_4 \
|
|
852 |
Mul_Begin(4) \
|
|
853 |
Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
|
|
854 |
Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
|
|
855 |
Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
|
|
856 |
Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
|
|
857 |
Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
858 |
Mul_End(5, 3)
|
270
by weidai
MMX/SSE2 optimizations |
859 |
|
860 |
#define Mul_8 \
|
|
861 |
Mul_Begin(8) \
|
|
862 |
Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
|
|
863 |
Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
|
|
864 |
Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
|
|
865 |
Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
|
|
866 |
Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
|
|
867 |
Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
|
|
868 |
Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
|
|
869 |
Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
|
|
870 |
Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
|
|
871 |
Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
|
|
872 |
Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
|
|
873 |
Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
|
|
874 |
Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
875 |
Mul_End(13, 7)
|
270
by weidai
MMX/SSE2 optimizations |
876 |
|
877 |
#define Mul_16 \
|
|
878 |
Mul_Begin(16) \
|
|
879 |
Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
|
|
880 |
Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
|
|
881 |
Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
|
|
882 |
Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
|
|
883 |
Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
|
|
884 |
Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
|
|
885 |
Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
|
|
886 |
Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
|
|
887 |
Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
|
|
888 |
Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
|
|
889 |
Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
|
|
890 |
Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
|
|
891 |
Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
|
|
892 |
Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
|
|
893 |
Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
|
|
894 |
Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
|
|
895 |
Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
|
|
896 |
Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
|
|
897 |
Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
|
|
898 |
Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
|
|
899 |
Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
|
|
900 |
Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
|
|
901 |
Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
|
|
902 |
Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
|
|
903 |
Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
|
|
904 |
Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
|
|
905 |
Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
|
|
906 |
Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
|
|
907 |
Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
908 |
Mul_End(29, 15)
|
270
by weidai
MMX/SSE2 optimizations |
909 |
|
910 |
#define Squ_2 \
|
|
911 |
Squ_Begin(2) \
|
|
912 |
Squ_End(2)
|
|
913 |
||
914 |
#define Squ_4 \
|
|
915 |
Squ_Begin(4) \
|
|
916 |
Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
|
|
917 |
Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
|
|
918 |
Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
|
|
919 |
Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
|
|
920 |
Squ_End(4)
|
|
921 |
||
922 |
#define Squ_8 \
|
|
923 |
Squ_Begin(8) \
|
|
924 |
Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
|
|
925 |
Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
|
|
926 |
Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
|
|
927 |
Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
|
|
928 |
Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
|
|
929 |
Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
|
|
930 |
Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
|
|
931 |
Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
|
|
932 |
Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
|
|
933 |
Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
|
|
934 |
Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
|
|
935 |
Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
|
|
936 |
Squ_End(8)
|
|
937 |
||
938 |
#define Squ_16 \
|
|
939 |
Squ_Begin(16) \
|
|
940 |
Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
|
|
941 |
Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
|
|
942 |
Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
|
|
943 |
Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
|
|
944 |
Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
|
|
945 |
Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
|
|
946 |
Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
|
|
947 |
Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
|
|
948 |
Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
|
|
949 |
Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
|
|
950 |
Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
|
|
951 |
Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
|
|
952 |
Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
|
|
953 |
Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
|
|
954 |
Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
|
|
955 |
Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
|
|
956 |
Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
|
|
957 |
Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
|
|
958 |
Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
|
|
959 |
Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
|
|
960 |
Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
|
|
961 |
Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
|
|
962 |
Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
|
|
963 |
Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
|
|
964 |
Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
|
|
965 |
Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
|
|
966 |
Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
|
|
967 |
Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
|
|
968 |
Squ_End(16)
|
|
969 |
||
970 |
#define Bot_2 \
|
|
971 |
Mul_Begin(2) \
|
|
972 |
Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
|
|
973 |
Bot_End(2)
|
|
974 |
||
975 |
#define Bot_4 \
|
|
976 |
Mul_Begin(4) \
|
|
977 |
Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
|
|
978 |
Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
|
|
979 |
Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
|
|
980 |
Bot_End(4)
|
|
981 |
||
982 |
#define Bot_8 \
|
|
983 |
Mul_Begin(8) \
|
|
984 |
Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
|
|
985 |
Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
|
|
986 |
Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
|
|
987 |
Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
|
|
988 |
Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
|
|
989 |
Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
|
|
990 |
Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
|
|
991 |
Bot_End(8)
|
|
992 |
||
993 |
#define Bot_16 \
|
|
994 |
Mul_Begin(16) \
|
|
995 |
Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
|
|
996 |
Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
|
|
997 |
Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
|
|
998 |
Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
|
|
999 |
Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
|
|
1000 |
Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
|
|
1001 |
Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
|
|
1002 |
Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
|
|
1003 |
Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
|
|
1004 |
Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
|
|
1005 |
Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
|
|
1006 |
Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
|
|
1007 |
Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
|
|
1008 |
Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
|
|
1009 |
Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
|
|
1010 |
Bot_End(16)
|
|
363
by weidai
fix possible branch prediction analysis (BPA) vulnerability |
1011 |
|
1012 |
#endif
|
|
270
by weidai
MMX/SSE2 optimizations |
1013 |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1014 |
#if 0
|
270
by weidai
MMX/SSE2 optimizations |
1015 |
#define Mul_Begin(n) \
|
1016 |
Declare2Words(p) \
|
|
1017 |
Declare2Words(c) \
|
|
1018 |
Declare2Words(d) \
|
|
1019 |
MultiplyWords(p, A[0], B[0]) \
|
|
1020 |
AssignWord(c, LowWord(p)) \
|
|
1021 |
AssignWord(d, HighWord(p))
|
|
1022 |
||
1023 |
#define Mul_Acc(i, j) \
|
|
1024 |
MultiplyWords(p, A[i], B[j]) \
|
|
1025 |
Acc2WordsBy1(c, LowWord(p)) \
|
|
1026 |
Acc2WordsBy1(d, HighWord(p))
|
|
1027 |
||
1028 |
#define Mul_SaveAcc(k, i, j) \
|
|
1029 |
R[k] = LowWord(c); \
|
|
1030 |
Add2WordsBy1(c, d, HighWord(c)) \
|
|
1031 |
MultiplyWords(p, A[i], B[j]) \
|
|
1032 |
AssignWord(d, HighWord(p)) \
|
|
1033 |
Acc2WordsBy1(c, LowWord(p))
|
|
1034 |
||
1035 |
#define Mul_End(n) \
|
|
1036 |
R[2*n-3] = LowWord(c); \
|
|
1037 |
Acc2WordsBy1(d, HighWord(c)) \
|
|
1038 |
MultiplyWords(p, A[n-1], B[n-1])\
|
|
1039 |
Acc2WordsBy2(d, p) \
|
|
1040 |
R[2*n-2] = LowWord(d); \
|
|
1041 |
R[2*n-1] = HighWord(d);
|
|
1042 |
||
1043 |
#define Bot_SaveAcc(k, i, j) \
|
|
1044 |
R[k] = LowWord(c); \
|
|
1045 |
word e = LowWord(d) + HighWord(c); \
|
|
1046 |
e += A[i] * B[j];
|
|
1047 |
||
1048 |
#define Bot_Acc(i, j) \
|
|
1049 |
e += A[i] * B[j];
|
|
1050 |
||
1051 |
#define Bot_End(n) \
|
|
1052 |
R[n-1] = e;
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1053 |
#else
|
270
by weidai
MMX/SSE2 optimizations |
1054 |
#define Mul_Begin(n) \
|
1055 |
Declare2Words(p) \
|
|
1056 |
word c; \
|
|
1057 |
Declare2Words(d) \
|
|
1058 |
MultiplyWords(p, A[0], B[0]) \
|
|
1059 |
c = LowWord(p); \
|
|
1060 |
AssignWord(d, HighWord(p))
|
|
1061 |
||
1062 |
#define Mul_Acc(i, j) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1063 |
MulAcc(c, d, A[i], B[j])
|
270
by weidai
MMX/SSE2 optimizations |
1064 |
|
1065 |
#define Mul_SaveAcc(k, i, j) \
|
|
1066 |
R[k] = c; \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1067 |
c = LowWord(d); \
|
270
by weidai
MMX/SSE2 optimizations |
1068 |
AssignWord(d, HighWord(d)) \
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1069 |
MulAcc(c, d, A[i], B[j])
|
270
by weidai
MMX/SSE2 optimizations |
1070 |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1071 |
#define Mul_End(k, i) \
|
1072 |
R[k] = c; \
|
|
1073 |
MultiplyWords(p, A[i], B[i]) \
|
|
1074 |
Acc2WordsBy2(p, d) \
|
|
1075 |
R[k+1] = LowWord(p); \
|
|
1076 |
R[k+2] = HighWord(p);
|
|
270
by weidai
MMX/SSE2 optimizations |
1077 |
|
1078 |
#define Bot_SaveAcc(k, i, j) \
|
|
1079 |
R[k] = c; \
|
|
1080 |
c = LowWord(d); \
|
|
1081 |
c += A[i] * B[j];
|
|
1082 |
||
1083 |
#define Bot_Acc(i, j) \
|
|
1084 |
c += A[i] * B[j];
|
|
1085 |
||
1086 |
#define Bot_End(n) \
|
|
1087 |
R[n-1] = c;
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1088 |
#endif
|
270
by weidai
MMX/SSE2 optimizations |
1089 |
|
1090 |
#define Squ_Begin(n) \
|
|
1091 |
Declare2Words(p) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1092 |
word c; \
|
270
by weidai
MMX/SSE2 optimizations |
1093 |
Declare2Words(d) \
|
1094 |
Declare2Words(e) \
|
|
1095 |
MultiplyWords(p, A[0], A[0]) \
|
|
1096 |
R[0] = LowWord(p); \
|
|
1097 |
AssignWord(e, HighWord(p)) \
|
|
1098 |
MultiplyWords(p, A[0], A[1]) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1099 |
c = LowWord(p); \
|
270
by weidai
MMX/SSE2 optimizations |
1100 |
AssignWord(d, HighWord(p)) \
|
1101 |
Squ_NonDiag \
|
|
1102 |
||
1103 |
#define Squ_NonDiag \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1104 |
Double3Words(c, d)
|
270
by weidai
MMX/SSE2 optimizations |
1105 |
|
1106 |
#define Squ_SaveAcc(k, i, j) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1107 |
Acc3WordsBy2(c, d, e) \
|
1108 |
R[k] = c; \
|
|
270
by weidai
MMX/SSE2 optimizations |
1109 |
MultiplyWords(p, A[i], A[j]) \
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1110 |
c = LowWord(p); \
|
270
by weidai
MMX/SSE2 optimizations |
1111 |
AssignWord(d, HighWord(p)) \
|
1112 |
||
1113 |
#define Squ_Acc(i, j) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1114 |
MulAcc(c, d, A[i], A[j])
|
270
by weidai
MMX/SSE2 optimizations |
1115 |
|
1116 |
#define Squ_Diag(i) \
|
|
1117 |
Squ_NonDiag \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1118 |
MulAcc(c, d, A[i], A[i])
|
270
by weidai
MMX/SSE2 optimizations |
1119 |
|
1120 |
#define Squ_End(n) \
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1121 |
Acc3WordsBy2(c, d, e) \
|
1122 |
R[2*n-3] = c; \
|
|
270
by weidai
MMX/SSE2 optimizations |
1123 |
MultiplyWords(p, A[n-1], A[n-1])\
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1124 |
Acc2WordsBy2(p, e) \
|
1125 |
R[2*n-2] = LowWord(p); \
|
|
1126 |
R[2*n-1] = HighWord(p);
|
|
270
by weidai
MMX/SSE2 optimizations |
1127 |
|
1128 |
void Baseline_Multiply2(word *R, const word *A, const word *B) |
|
1129 |
{
|
|
1130 |
Mul_2
|
|
1131 |
}
|
|
1132 |
||
1133 |
void Baseline_Multiply4(word *R, const word *A, const word *B) |
|
1134 |
{
|
|
1135 |
Mul_4
|
|
1136 |
}
|
|
1137 |
||
1138 |
void Baseline_Multiply8(word *R, const word *A, const word *B) |
|
1139 |
{
|
|
1140 |
Mul_8
|
|
1141 |
}
|
|
1142 |
||
1143 |
void Baseline_Square2(word *R, const word *A) |
|
1144 |
{
|
|
1145 |
Squ_2
|
|
1146 |
}
|
|
1147 |
||
1148 |
void Baseline_Square4(word *R, const word *A) |
|
1149 |
{
|
|
1150 |
Squ_4
|
|
1151 |
}
|
|
1152 |
||
1153 |
void Baseline_Square8(word *R, const word *A) |
|
1154 |
{
|
|
1155 |
Squ_8
|
|
1156 |
}
|
|
1157 |
||
1158 |
void Baseline_MultiplyBottom2(word *R, const word *A, const word *B) |
|
1159 |
{
|
|
1160 |
Bot_2
|
|
1161 |
}
|
|
1162 |
||
1163 |
void Baseline_MultiplyBottom4(word *R, const word *A, const word *B) |
|
1164 |
{
|
|
1165 |
Bot_4
|
|
1166 |
}
|
|
1167 |
||
1168 |
void Baseline_MultiplyBottom8(word *R, const word *A, const word *B) |
|
1169 |
{
|
|
1170 |
Bot_8
|
|
1171 |
}
|
|
1172 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
1173 |
#define Top_Begin(n) \
|
1174 |
Declare2Words(p) \
|
|
1175 |
word c; \
|
|
1176 |
Declare2Words(d) \
|
|
1177 |
MultiplyWords(p, A[0], B[n-2]);\
|
|
1178 |
AssignWord(d, HighWord(p));
|
|
1179 |
||
1180 |
#define Top_Acc(i, j) \
|
|
1181 |
MultiplyWords(p, A[i], B[j]);\
|
|
1182 |
Acc2WordsBy1(d, HighWord(p));
|
|
1183 |
||
1184 |
#define Top_SaveAcc0(i, j) \
|
|
1185 |
c = LowWord(d); \
|
|
1186 |
AssignWord(d, HighWord(d)) \
|
|
1187 |
MulAcc(c, d, A[i], B[j])
|
|
1188 |
||
1189 |
#define Top_SaveAcc1(i, j) \
|
|
1190 |
c = L<c; \
|
|
1191 |
Acc2WordsBy1(d, c); \
|
|
1192 |
c = LowWord(d); \
|
|
1193 |
AssignWord(d, HighWord(d)) \
|
|
1194 |
MulAcc(c, d, A[i], B[j])
|
|
1195 |
||
1196 |
void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L) |
|
1197 |
{
|
|
1198 |
word T[4]; |
|
1199 |
Baseline_Multiply2(T, A, B); |
|
1200 |
R[0] = T[2]; |
|
1201 |
R[1] = T[3]; |
|
1202 |
}
|
|
1203 |
||
1204 |
void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L) |
|
1205 |
{
|
|
1206 |
Top_Begin(4) |
|
1207 |
Top_Acc(1, 1) Top_Acc(2, 0) \ |
|
1208 |
Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ |
|
1209 |
Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ |
|
1210 |
Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \ |
|
1211 |
Mul_End(1, 3) |
|
1212 |
}
|
|
1213 |
||
1214 |
void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L) |
|
1215 |
{
|
|
1216 |
Top_Begin(8) |
|
1217 |
Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \ |
|
1218 |
Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ |
|
1219 |
Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ |
|
1220 |
Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ |
|
1221 |
Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ |
|
1222 |
Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ |
|
1223 |
Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ |
|
1224 |
Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \ |
|
1225 |
Mul_End(5, 7) |
|
1226 |
}
|
|
1227 |
||
1228 |
#if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available |
|
270
by weidai
MMX/SSE2 optimizations |
1229 |
void Baseline_Multiply16(word *R, const word *A, const word *B) |
1230 |
{
|
|
1231 |
Mul_16
|
|
1232 |
}
|
|
1233 |
||
1234 |
void Baseline_Square16(word *R, const word *A) |
|
1235 |
{
|
|
1236 |
Squ_16
|
|
1237 |
}
|
|
1238 |
||
1239 |
void Baseline_MultiplyBottom16(word *R, const word *A, const word *B) |
|
1240 |
{
|
|
1241 |
Bot_16
|
|
1242 |
}
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1243 |
|
1244 |
void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L) |
|
1245 |
{
|
|
1246 |
Top_Begin(16) |
|
1247 |
Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \ |
|
1248 |
Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ |
|
1249 |
Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ |
|
1250 |
Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ |
|
1251 |
Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ |
|
1252 |
Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ |
|
1253 |
Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ |
|
1254 |
Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ |
|
1255 |
Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ |
|
1256 |
Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ |
|
1257 |
Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ |
|
1258 |
Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ |
|
1259 |
Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ |
|
1260 |
Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ |
|
1261 |
Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ |
|
1262 |
Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \ |
|
1263 |
Mul_End(13, 15) |
|
1264 |
}
|
|
1265 |
#endif
|
|
270
by weidai
MMX/SSE2 optimizations |
1266 |
|
1267 |
// ********************************************************
|
|
1268 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
1269 |
#if CRYPTOPP_INTEGER_SSE2
|
270
by weidai
MMX/SSE2 optimizations |
1270 |
|
1271 |
CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff}; |
|
1272 |
||
1273 |
#undef Mul_Begin
|
|
1274 |
#undef Mul_Acc
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1275 |
#undef Top_Begin
|
1276 |
#undef Top_Acc
|
|
270
by weidai
MMX/SSE2 optimizations |
1277 |
#undef Squ_Acc
|
1278 |
#undef Squ_NonDiag
|
|
1279 |
#undef Squ_Diag
|
|
1280 |
#undef Squ_SaveAcc
|
|
1281 |
#undef Squ_Begin
|
|
1282 |
#undef Mul_SaveAcc
|
|
1283 |
#undef Bot_Acc
|
|
1284 |
#undef Bot_SaveAcc
|
|
1285 |
#undef Bot_End
|
|
1286 |
#undef Squ_End
|
|
1287 |
#undef Mul_End
|
|
1288 |
||
1289 |
#define SSE2_FinalSave(k) \
|
|
1290 |
AS2( psllq xmm5, 16) \
|
|
1291 |
AS2( paddq xmm4, xmm5) \
|
|
1292 |
AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
|
|
1293 |
||
1294 |
#define SSE2_SaveShift(k) \
|
|
1295 |
AS2( movq xmm0, xmm6) \
|
|
1296 |
AS2( punpckhqdq xmm6, xmm0) \
|
|
1297 |
AS2( movq xmm1, xmm7) \
|
|
1298 |
AS2( punpckhqdq xmm7, xmm1) \
|
|
1299 |
AS2( paddd xmm6, xmm0) \
|
|
1300 |
AS2( pslldq xmm6, 4) \
|
|
1301 |
AS2( paddd xmm7, xmm1) \
|
|
1302 |
AS2( paddd xmm4, xmm6) \
|
|
1303 |
AS2( pslldq xmm7, 4) \
|
|
1304 |
AS2( movq xmm6, xmm4) \
|
|
1305 |
AS2( paddd xmm5, xmm7) \
|
|
1306 |
AS2( movq xmm7, xmm5) \
|
|
1307 |
AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
|
|
1308 |
AS2( psrlq xmm6, 16) \
|
|
1309 |
AS2( paddq xmm6, xmm7) \
|
|
1310 |
AS2( punpckhqdq xmm4, xmm0) \
|
|
1311 |
AS2( punpckhqdq xmm5, xmm0) \
|
|
1312 |
AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
|
|
1313 |
AS2( psrlq xmm6, 3*16) \
|
|
1314 |
AS2( paddd xmm4, xmm6) \
|
|
1315 |
||
1316 |
#define Squ_SSE2_SaveShift(k) \
|
|
1317 |
AS2( movq xmm0, xmm6) \
|
|
1318 |
AS2( punpckhqdq xmm6, xmm0) \
|
|
1319 |
AS2( movq xmm1, xmm7) \
|
|
1320 |
AS2( punpckhqdq xmm7, xmm1) \
|
|
1321 |
AS2( paddd xmm6, xmm0) \
|
|
1322 |
AS2( pslldq xmm6, 4) \
|
|
1323 |
AS2( paddd xmm7, xmm1) \
|
|
1324 |
AS2( paddd xmm4, xmm6) \
|
|
1325 |
AS2( pslldq xmm7, 4) \
|
|
1326 |
AS2( movhlps xmm6, xmm4) \
|
|
1327 |
AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
|
|
1328 |
AS2( paddd xmm5, xmm7) \
|
|
1329 |
AS2( movhps QWORD PTR [esp+12], xmm5)\
|
|
1330 |
AS2( psrlq xmm4, 16) \
|
|
1331 |
AS2( paddq xmm4, xmm5) \
|
|
1332 |
AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
|
|
1333 |
AS2( psrlq xmm4, 3*16) \
|
|
1334 |
AS2( paddd xmm4, xmm6) \
|
|
1335 |
AS2( movq QWORD PTR [esp+4], xmm4)\
|
|
1336 |
||
1337 |
#define SSE2_FirstMultiply(i) \
|
|
1338 |
AS2( movdqa xmm7, [esi+(i)*16])\
|
|
1339 |
AS2( movdqa xmm5, [edi-(i)*16])\
|
|
1340 |
AS2( pmuludq xmm5, xmm7) \
|
|
1341 |
AS2( movdqa xmm4, [ebx])\
|
|
1342 |
AS2( movdqa xmm6, xmm4) \
|
|
1343 |
AS2( pand xmm4, xmm5) \
|
|
1344 |
AS2( psrld xmm5, 16) \
|
|
1345 |
AS2( pmuludq xmm7, [edx-(i)*16])\
|
|
1346 |
AS2( pand xmm6, xmm7) \
|
|
1347 |
AS2( psrld xmm7, 16)
|
|
1348 |
||
1349 |
#define Squ_Begin(n) \
|
|
1350 |
SquPrologue \
|
|
1351 |
AS2( mov esi, esp)\
|
|
1352 |
AS2( and esp, 0xfffffff0)\
|
|
1353 |
AS2( lea edi, [esp-32*n])\
|
|
1354 |
AS2( sub esp, 32*n+16)\
|
|
1355 |
AS1( push esi)\
|
|
1356 |
AS2( mov esi, edi) \
|
|
1357 |
AS2( xor edx, edx) \
|
|
1358 |
ASL(1) \
|
|
1359 |
ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
|
|
1360 |
ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
|
|
1361 |
AS2( movdqa [edi+2*edx], xmm0) \
|
|
1362 |
AS2( psrlq xmm0, 32) \
|
|
1363 |
AS2( movdqa [edi+2*edx+16], xmm0) \
|
|
1364 |
AS2( movdqa [edi+16*n+2*edx], xmm1) \
|
|
1365 |
AS2( psrlq xmm1, 32) \
|
|
1366 |
AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
|
|
1367 |
AS2( add edx, 16) \
|
|
1368 |
AS2( cmp edx, 8*(n)) \
|
|
1369 |
ASJ( jne, 1, b) \
|
|
1370 |
AS2( lea edx, [edi+16*n])\
|
|
1371 |
SSE2_FirstMultiply(0) \
|
|
1372 |
||
1373 |
#define Squ_Acc(i) \
|
|
1374 |
ASL(LSqu##i) \
|
|
1375 |
AS2( movdqa xmm1, [esi+(i)*16]) \
|
|
1376 |
AS2( movdqa xmm0, [edi-(i)*16]) \
|
|
1377 |
AS2( movdqa xmm2, [ebx]) \
|
|
1378 |
AS2( pmuludq xmm0, xmm1) \
|
|
1379 |
AS2( pmuludq xmm1, [edx-(i)*16]) \
|
|
1380 |
AS2( movdqa xmm3, xmm2) \
|
|
1381 |
AS2( pand xmm2, xmm0) \
|
|
1382 |
AS2( psrld xmm0, 16) \
|
|
1383 |
AS2( paddd xmm4, xmm2) \
|
|
1384 |
AS2( paddd xmm5, xmm0) \
|
|
1385 |
AS2( pand xmm3, xmm1) \
|
|
1386 |
AS2( psrld xmm1, 16) \
|
|
1387 |
AS2( paddd xmm6, xmm3) \
|
|
1388 |
AS2( paddd xmm7, xmm1) \
|
|
1389 |
||
1390 |
#define Squ_Acc1(i)
|
|
1391 |
#define Squ_Acc2(i) ASC(call, LSqu##i)
|
|
1392 |
#define Squ_Acc3(i) Squ_Acc2(i)
|
|
1393 |
#define Squ_Acc4(i) Squ_Acc2(i)
|
|
1394 |
#define Squ_Acc5(i) Squ_Acc2(i)
|
|
1395 |
#define Squ_Acc6(i) Squ_Acc2(i)
|
|
1396 |
#define Squ_Acc7(i) Squ_Acc2(i)
|
|
1397 |
#define Squ_Acc8(i) Squ_Acc2(i)
|
|
1398 |
||
1399 |
#define SSE2_End(E, n) \
|
|
1400 |
SSE2_SaveShift(2*(n)-3) \
|
|
1401 |
AS2( movdqa xmm7, [esi+16]) \
|
|
1402 |
AS2( movdqa xmm0, [edi]) \
|
|
1403 |
AS2( pmuludq xmm0, xmm7) \
|
|
1404 |
AS2( movdqa xmm2, [ebx]) \
|
|
1405 |
AS2( pmuludq xmm7, [edx]) \
|
|
1406 |
AS2( movdqa xmm6, xmm2) \
|
|
1407 |
AS2( pand xmm2, xmm0) \
|
|
1408 |
AS2( psrld xmm0, 16) \
|
|
1409 |
AS2( paddd xmm4, xmm2) \
|
|
1410 |
AS2( paddd xmm5, xmm0) \
|
|
1411 |
AS2( pand xmm6, xmm7) \
|
|
1412 |
AS2( psrld xmm7, 16) \
|
|
1413 |
SSE2_SaveShift(2*(n)-2) \
|
|
1414 |
SSE2_FinalSave(2*(n)-1) \
|
|
1415 |
AS1( pop esp)\
|
|
1416 |
E
|
|
1417 |
||
1418 |
#define Squ_End(n) SSE2_End(SquEpilogue, n)
|
|
1419 |
#define Mul_End(n) SSE2_End(MulEpilogue, n)
|
|
1420 |
#define Top_End(n) SSE2_End(TopEpilogue, n)
|
|
1421 |
||
1422 |
#define Squ_Column1(k, i) \
|
|
1423 |
Squ_SSE2_SaveShift(k) \
|
|
1424 |
AS2( add esi, 16) \
|
|
1425 |
SSE2_FirstMultiply(1)\
|
|
1426 |
Squ_Acc##i(i) \
|
|
1427 |
AS2( paddd xmm4, xmm4) \
|
|
1428 |
AS2( paddd xmm5, xmm5) \
|
|
1429 |
AS2( movdqa xmm3, [esi]) \
|
|
1430 |
AS2( movq xmm1, QWORD PTR [esi+8]) \
|
|
1431 |
AS2( pmuludq xmm1, xmm3) \
|
|
1432 |
AS2( pmuludq xmm3, xmm3) \
|
|
1433 |
AS2( movdqa xmm0, [ebx])\
|
|
1434 |
AS2( movdqa xmm2, xmm0) \
|
|
1435 |
AS2( pand xmm0, xmm1) \
|
|
1436 |
AS2( psrld xmm1, 16) \
|
|
1437 |
AS2( paddd xmm6, xmm0) \
|
|
1438 |
AS2( paddd xmm7, xmm1) \
|
|
1439 |
AS2( pand xmm2, xmm3) \
|
|
1440 |
AS2( psrld xmm3, 16) \
|
|
1441 |
AS2( paddd xmm6, xmm6) \
|
|
1442 |
AS2( paddd xmm7, xmm7) \
|
|
1443 |
AS2( paddd xmm4, xmm2) \
|
|
1444 |
AS2( paddd xmm5, xmm3) \
|
|
1445 |
AS2( movq xmm0, QWORD PTR [esp+4])\
|
|
1446 |
AS2( movq xmm1, QWORD PTR [esp+12])\
|
|
1447 |
AS2( paddd xmm4, xmm0)\
|
|
1448 |
AS2( paddd xmm5, xmm1)\
|
|
1449 |
||
1450 |
#define Squ_Column0(k, i) \
|
|
1451 |
Squ_SSE2_SaveShift(k) \
|
|
1452 |
AS2( add edi, 16) \
|
|
1453 |
AS2( add edx, 16) \
|
|
1454 |
SSE2_FirstMultiply(1)\
|
|
1455 |
Squ_Acc##i(i) \
|
|
1456 |
AS2( paddd xmm6, xmm6) \
|
|
1457 |
AS2( paddd xmm7, xmm7) \
|
|
1458 |
AS2( paddd xmm4, xmm4) \
|
|
1459 |
AS2( paddd xmm5, xmm5) \
|
|
1460 |
AS2( movq xmm0, QWORD PTR [esp+4])\
|
|
1461 |
AS2( movq xmm1, QWORD PTR [esp+12])\
|
|
1462 |
AS2( paddd xmm4, xmm0)\
|
|
1463 |
AS2( paddd xmm5, xmm1)\
|
|
1464 |
||
1465 |
#define SSE2_MulAdd45 \
|
|
1466 |
AS2( movdqa xmm7, [esi]) \
|
|
1467 |
AS2( movdqa xmm0, [edi]) \
|
|
1468 |
AS2( pmuludq xmm0, xmm7) \
|
|
1469 |
AS2( movdqa xmm2, [ebx]) \
|
|
1470 |
AS2( pmuludq xmm7, [edx]) \
|
|
1471 |
AS2( movdqa xmm6, xmm2) \
|
|
1472 |
AS2( pand xmm2, xmm0) \
|
|
1473 |
AS2( psrld xmm0, 16) \
|
|
1474 |
AS2( paddd xmm4, xmm2) \
|
|
1475 |
AS2( paddd xmm5, xmm0) \
|
|
1476 |
AS2( pand xmm6, xmm7) \
|
|
1477 |
AS2( psrld xmm7, 16)
|
|
1478 |
||
1479 |
#define Mul_Begin(n) \
|
|
1480 |
MulPrologue \
|
|
1481 |
AS2( mov esi, esp)\
|
|
1482 |
AS2( and esp, 0xfffffff0)\
|
|
1483 |
AS2( sub esp, 48*n+16)\
|
|
1484 |
AS1( push esi)\
|
|
1485 |
AS2( xor edx, edx) \
|
|
1486 |
ASL(1) \
|
|
1487 |
ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
|
|
1488 |
ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
|
|
1489 |
ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
|
|
1490 |
AS2( movdqa [esp+20+2*edx], xmm0) \
|
|
1491 |
AS2( psrlq xmm0, 32) \
|
|
1492 |
AS2( movdqa [esp+20+2*edx+16], xmm0) \
|
|
1493 |
AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
|
|
1494 |
AS2( psrlq xmm1, 32) \
|
|
1495 |
AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
|
|
1496 |
AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
|
|
1497 |
AS2( psrlq xmm2, 32) \
|
|
1498 |
AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
|
|
1499 |
AS2( add edx, 16) \
|
|
1500 |
AS2( cmp edx, 8*(n)) \
|
|
1501 |
ASJ( jne, 1, b) \
|
|
1502 |
AS2( lea edi, [esp+20])\
|
|
1503 |
AS2( lea edx, [esp+20+16*n])\
|
|
1504 |
AS2( lea esi, [esp+20+32*n])\
|
|
1505 |
SSE2_FirstMultiply(0) \
|
|
1506 |
||
1507 |
#define Mul_Acc(i) \
|
|
1508 |
ASL(LMul##i) \
|
|
1509 |
AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
|
|
1510 |
AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
|
|
1511 |
AS2( movdqa xmm2, [ebx]) \
|
|
1512 |
AS2( pmuludq xmm0, xmm1) \
|
|
1513 |
AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
|
|
1514 |
AS2( movdqa xmm3, xmm2) \
|
|
1515 |
AS2( pand xmm2, xmm0) \
|
|
1516 |
AS2( psrld xmm0, 16) \
|
|
1517 |
AS2( paddd xmm4, xmm2) \
|
|
1518 |
AS2( paddd xmm5, xmm0) \
|
|
1519 |
AS2( pand xmm3, xmm1) \
|
|
1520 |
AS2( psrld xmm1, 16) \
|
|
1521 |
AS2( paddd xmm6, xmm3) \
|
|
1522 |
AS2( paddd xmm7, xmm1) \
|
|
1523 |
||
1524 |
#define Mul_Acc1(i)
|
|
1525 |
#define Mul_Acc2(i) ASC(call, LMul##i)
|
|
1526 |
#define Mul_Acc3(i) Mul_Acc2(i)
|
|
1527 |
#define Mul_Acc4(i) Mul_Acc2(i)
|
|
1528 |
#define Mul_Acc5(i) Mul_Acc2(i)
|
|
1529 |
#define Mul_Acc6(i) Mul_Acc2(i)
|
|
1530 |
#define Mul_Acc7(i) Mul_Acc2(i)
|
|
1531 |
#define Mul_Acc8(i) Mul_Acc2(i)
|
|
1532 |
#define Mul_Acc9(i) Mul_Acc2(i)
|
|
1533 |
#define Mul_Acc10(i) Mul_Acc2(i)
|
|
1534 |
#define Mul_Acc11(i) Mul_Acc2(i)
|
|
1535 |
#define Mul_Acc12(i) Mul_Acc2(i)
|
|
1536 |
#define Mul_Acc13(i) Mul_Acc2(i)
|
|
1537 |
#define Mul_Acc14(i) Mul_Acc2(i)
|
|
1538 |
#define Mul_Acc15(i) Mul_Acc2(i)
|
|
1539 |
#define Mul_Acc16(i) Mul_Acc2(i)
|
|
1540 |
||
1541 |
#define Mul_Column1(k, i) \
|
|
1542 |
SSE2_SaveShift(k) \
|
|
1543 |
AS2( add esi, 16) \
|
|
1544 |
SSE2_MulAdd45\
|
|
1545 |
Mul_Acc##i(i) \
|
|
1546 |
||
1547 |
#define Mul_Column0(k, i) \
|
|
1548 |
SSE2_SaveShift(k) \
|
|
1549 |
AS2( add edi, 16) \
|
|
1550 |
AS2( add edx, 16) \
|
|
1551 |
SSE2_MulAdd45\
|
|
1552 |
Mul_Acc##i(i) \
|
|
1553 |
||
1554 |
#define Bot_Acc(i) \
|
|
1555 |
AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
|
|
1556 |
AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
|
|
1557 |
AS2( pmuludq xmm0, xmm1) \
|
|
1558 |
AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
|
|
1559 |
AS2( paddq xmm4, xmm0) \
|
|
1560 |
AS2( paddd xmm6, xmm1)
|
|
1561 |
||
1562 |
#define Bot_SaveAcc(k) \
|
|
1563 |
SSE2_SaveShift(k) \
|
|
1564 |
AS2( add edi, 16) \
|
|
1565 |
AS2( add edx, 16) \
|
|
1566 |
AS2( movdqa xmm6, [esi]) \
|
|
1567 |
AS2( movdqa xmm0, [edi]) \
|
|
1568 |
AS2( pmuludq xmm0, xmm6) \
|
|
1569 |
AS2( paddq xmm4, xmm0) \
|
|
1570 |
AS2( psllq xmm5, 16) \
|
|
1571 |
AS2( paddq xmm4, xmm5) \
|
|
1572 |
AS2( pmuludq xmm6, [edx])
|
|
1573 |
||
1574 |
#define Bot_End(n) \
|
|
1575 |
AS2( movhlps xmm7, xmm6) \
|
|
1576 |
AS2( paddd xmm6, xmm7) \
|
|
1577 |
AS2( psllq xmm6, 32) \
|
|
1578 |
AS2( paddd xmm4, xmm6) \
|
|
1579 |
AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
|
|
1580 |
AS1( pop esp)\
|
|
1581 |
MulEpilogue
|
|
1582 |
||
1583 |
#define Top_Begin(n) \
|
|
1584 |
TopPrologue \
|
|
1585 |
AS2( mov edx, esp)\
|
|
1586 |
AS2( and esp, 0xfffffff0)\
|
|
1587 |
AS2( sub esp, 48*n+16)\
|
|
1588 |
AS1( push edx)\
|
|
1589 |
AS2( xor edx, edx) \
|
|
1590 |
ASL(1) \
|
|
1591 |
ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
|
|
1592 |
ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
|
|
1593 |
ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
|
|
1594 |
AS2( movdqa [esp+20+2*edx], xmm0) \
|
|
1595 |
AS2( psrlq xmm0, 32) \
|
|
1596 |
AS2( movdqa [esp+20+2*edx+16], xmm0) \
|
|
1597 |
AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
|
|
1598 |
AS2( psrlq xmm1, 32) \
|
|
1599 |
AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
|
|
1600 |
AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
|
|
1601 |
AS2( psrlq xmm2, 32) \
|
|
1602 |
AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
|
|
1603 |
AS2( add edx, 16) \
|
|
1604 |
AS2( cmp edx, 8*(n)) \
|
|
1605 |
ASJ( jne, 1, b) \
|
|
1606 |
AS2( mov eax, esi) \
|
|
1607 |
AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
|
|
1608 |
AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
|
|
1609 |
AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
|
|
1610 |
AS2( pxor xmm4, xmm4)\
|
|
1611 |
AS2( pxor xmm5, xmm5)
|
|
1612 |
||
1613 |
#define Top_Acc(i) \
|
|
1614 |
AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
|
|
1615 |
AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
|
|
1616 |
AS2( psrlq xmm0, 48) \
|
|
1617 |
AS2( paddd xmm5, xmm0)\
|
|
1618 |
||
1619 |
#define Top_Column0(i) \
|
|
1620 |
AS2( psllq xmm5, 32) \
|
|
1621 |
AS2( add edi, 16) \
|
|
1622 |
AS2( add edx, 16) \
|
|
1623 |
SSE2_MulAdd45\
|
|
1624 |
Mul_Acc##i(i) \
|
|
1625 |
||
1626 |
#define Top_Column1(i) \
|
|
1627 |
SSE2_SaveShift(0) \
|
|
1628 |
AS2( add esi, 16) \
|
|
1629 |
SSE2_MulAdd45\
|
|
1630 |
Mul_Acc##i(i) \
|
|
1631 |
AS2( shr eax, 16) \
|
|
1632 |
AS2( movd xmm0, eax)\
|
|
1633 |
AS2( movd xmm1, [ecx+4])\
|
|
1634 |
AS2( psrld xmm1, 16)\
|
|
1635 |
AS2( pcmpgtd xmm1, xmm0)\
|
|
1636 |
AS2( psrld xmm1, 31)\
|
|
1637 |
AS2( paddd xmm4, xmm1)\
|
|
1638 |
||
1639 |
void SSE2_Square4(word *C, const word *A) |
|
1640 |
{
|
|
1641 |
Squ_Begin(2) |
|
1642 |
Squ_Column0(0, 1) |
|
1643 |
Squ_End(2) |
|
1644 |
}
|
|
1645 |
||
1646 |
void SSE2_Square8(word *C, const word *A) |
|
1647 |
{
|
|
1648 |
Squ_Begin(4) |
|
1649 |
#ifndef __GNUC__
|
|
1650 |
ASJ( jmp, 0, f) |
|
1651 |
Squ_Acc(2) |
|
1652 |
AS1( ret) ASL(0) |
|
1653 |
#endif
|
|
1654 |
Squ_Column0(0, 1) |
|
1655 |
Squ_Column1(1, 1) |
|
1656 |
Squ_Column0(2, 2) |
|
1657 |
Squ_Column1(3, 1) |
|
1658 |
Squ_Column0(4, 1) |
|
1659 |
Squ_End(4) |
|
1660 |
}
|
|
1661 |
||
1662 |
void SSE2_Square16(word *C, const word *A) |
|
1663 |
{
|
|
1664 |
Squ_Begin(8) |
|
1665 |
#ifndef __GNUC__
|
|
1666 |
ASJ( jmp, 0, f) |
|
1667 |
Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) |
|
1668 |
AS1( ret) ASL(0) |
|
1669 |
#endif
|
|
1670 |
Squ_Column0(0, 1) |
|
1671 |
Squ_Column1(1, 1) |
|
1672 |
Squ_Column0(2, 2) |
|
1673 |
Squ_Column1(3, 2) |
|
1674 |
Squ_Column0(4, 3) |
|
1675 |
Squ_Column1(5, 3) |
|
1676 |
Squ_Column0(6, 4) |
|
1677 |
Squ_Column1(7, 3) |
|
1678 |
Squ_Column0(8, 3) |
|
1679 |
Squ_Column1(9, 2) |
|
1680 |
Squ_Column0(10, 2) |
|
1681 |
Squ_Column1(11, 1) |
|
1682 |
Squ_Column0(12, 1) |
|
1683 |
Squ_End(8) |
|
1684 |
}
|
|
1685 |
||
1686 |
void SSE2_Square32(word *C, const word *A) |
|
1687 |
{
|
|
1688 |
Squ_Begin(16) |
|
1689 |
ASJ( jmp, 0, f) |
|
1690 |
Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) |
|
1691 |
AS1( ret) ASL(0) |
|
1692 |
Squ_Column0(0, 1) |
|
1693 |
Squ_Column1(1, 1) |
|
1694 |
Squ_Column0(2, 2) |
|
1695 |
Squ_Column1(3, 2) |
|
1696 |
Squ_Column0(4, 3) |
|
1697 |
Squ_Column1(5, 3) |
|
1698 |
Squ_Column0(6, 4) |
|
1699 |
Squ_Column1(7, 4) |
|
1700 |
Squ_Column0(8, 5) |
|
1701 |
Squ_Column1(9, 5) |
|
1702 |
Squ_Column0(10, 6) |
|
1703 |
Squ_Column1(11, 6) |
|
1704 |
Squ_Column0(12, 7) |
|
1705 |
Squ_Column1(13, 7) |
|
1706 |
Squ_Column0(14, 8) |
|
1707 |
Squ_Column1(15, 7) |
|
1708 |
Squ_Column0(16, 7) |
|
1709 |
Squ_Column1(17, 6) |
|
1710 |
Squ_Column0(18, 6) |
|
1711 |
Squ_Column1(19, 5) |
|
1712 |
Squ_Column0(20, 5) |
|
1713 |
Squ_Column1(21, 4) |
|
1714 |
Squ_Column0(22, 4) |
|
1715 |
Squ_Column1(23, 3) |
|
1716 |
Squ_Column0(24, 3) |
|
1717 |
Squ_Column1(25, 2) |
|
1718 |
Squ_Column0(26, 2) |
|
1719 |
Squ_Column1(27, 1) |
|
1720 |
Squ_Column0(28, 1) |
|
1721 |
Squ_End(16) |
|
1722 |
}
|
|
1723 |
||
1724 |
void SSE2_Multiply4(word *C, const word *A, const word *B) |
|
1725 |
{
|
|
1726 |
Mul_Begin(2) |
|
1727 |
#ifndef __GNUC__
|
|
1728 |
ASJ( jmp, 0, f) |
|
1729 |
Mul_Acc(2) |
|
1730 |
AS1( ret) ASL(0) |
|
1731 |
#endif
|
|
1732 |
Mul_Column0(0, 2) |
|
1733 |
Mul_End(2) |
|
1734 |
}
|
|
1735 |
||
1736 |
void SSE2_Multiply8(word *C, const word *A, const word *B) |
|
1737 |
{
|
|
1738 |
Mul_Begin(4) |
|
1739 |
#ifndef __GNUC__
|
|
1740 |
ASJ( jmp, 0, f) |
|
1741 |
Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
|
1742 |
AS1( ret) ASL(0) |
|
1743 |
#endif
|
|
1744 |
Mul_Column0(0, 2) |
|
1745 |
Mul_Column1(1, 3) |
|
1746 |
Mul_Column0(2, 4) |
|
1747 |
Mul_Column1(3, 3) |
|
1748 |
Mul_Column0(4, 2) |
|
1749 |
Mul_End(4) |
|
1750 |
}
|
|
1751 |
||
1752 |
void SSE2_Multiply16(word *C, const word *A, const word *B) |
|
1753 |
{
|
|
1754 |
Mul_Begin(8) |
|
1755 |
#ifndef __GNUC__
|
|
1756 |
ASJ( jmp, 0, f) |
|
1757 |
Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
|
1758 |
AS1( ret) ASL(0) |
|
1759 |
#endif
|
|
1760 |
Mul_Column0(0, 2) |
|
1761 |
Mul_Column1(1, 3) |
|
1762 |
Mul_Column0(2, 4) |
|
1763 |
Mul_Column1(3, 5) |
|
1764 |
Mul_Column0(4, 6) |
|
1765 |
Mul_Column1(5, 7) |
|
1766 |
Mul_Column0(6, 8) |
|
1767 |
Mul_Column1(7, 7) |
|
1768 |
Mul_Column0(8, 6) |
|
1769 |
Mul_Column1(9, 5) |
|
1770 |
Mul_Column0(10, 4) |
|
1771 |
Mul_Column1(11, 3) |
|
1772 |
Mul_Column0(12, 2) |
|
1773 |
Mul_End(8) |
|
1774 |
}
|
|
1775 |
||
1776 |
void SSE2_Multiply32(word *C, const word *A, const word *B) |
|
1777 |
{
|
|
1778 |
Mul_Begin(16) |
|
1779 |
ASJ( jmp, 0, f) |
|
1780 |
Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
|
1781 |
AS1( ret) ASL(0) |
|
1782 |
Mul_Column0(0, 2) |
|
1783 |
Mul_Column1(1, 3) |
|
1784 |
Mul_Column0(2, 4) |
|
1785 |
Mul_Column1(3, 5) |
|
1786 |
Mul_Column0(4, 6) |
|
1787 |
Mul_Column1(5, 7) |
|
1788 |
Mul_Column0(6, 8) |
|
1789 |
Mul_Column1(7, 9) |
|
1790 |
Mul_Column0(8, 10) |
|
1791 |
Mul_Column1(9, 11) |
|
1792 |
Mul_Column0(10, 12) |
|
1793 |
Mul_Column1(11, 13) |
|
1794 |
Mul_Column0(12, 14) |
|
1795 |
Mul_Column1(13, 15) |
|
1796 |
Mul_Column0(14, 16) |
|
1797 |
Mul_Column1(15, 15) |
|
1798 |
Mul_Column0(16, 14) |
|
1799 |
Mul_Column1(17, 13) |
|
1800 |
Mul_Column0(18, 12) |
|
1801 |
Mul_Column1(19, 11) |
|
1802 |
Mul_Column0(20, 10) |
|
1803 |
Mul_Column1(21, 9) |
|
1804 |
Mul_Column0(22, 8) |
|
1805 |
Mul_Column1(23, 7) |
|
1806 |
Mul_Column0(24, 6) |
|
1807 |
Mul_Column1(25, 5) |
|
1808 |
Mul_Column0(26, 4) |
|
1809 |
Mul_Column1(27, 3) |
|
1810 |
Mul_Column0(28, 2) |
|
1811 |
Mul_End(16) |
|
1812 |
}
|
|
1813 |
||
1814 |
void SSE2_MultiplyBottom4(word *C, const word *A, const word *B) |
|
1815 |
{
|
|
1816 |
Mul_Begin(2) |
|
1817 |
Bot_SaveAcc(0) Bot_Acc(2) |
|
1818 |
Bot_End(2) |
|
1819 |
}
|
|
1820 |
||
1821 |
void SSE2_MultiplyBottom8(word *C, const word *A, const word *B) |
|
1822 |
{
|
|
1823 |
Mul_Begin(4) |
|
1824 |
#ifndef __GNUC__
|
|
1825 |
ASJ( jmp, 0, f) |
|
1826 |
Mul_Acc(3) Mul_Acc(2) |
|
1827 |
AS1( ret) ASL(0) |
|
1828 |
#endif
|
|
1829 |
Mul_Column0(0, 2) |
|
1830 |
Mul_Column1(1, 3) |
|
1831 |
Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) |
|
1832 |
Bot_End(4) |
|
1833 |
}
|
|
1834 |
||
1835 |
void SSE2_MultiplyBottom16(word *C, const word *A, const word *B) |
|
1836 |
{
|
|
1837 |
Mul_Begin(8) |
|
1838 |
#ifndef __GNUC__
|
|
1839 |
ASJ( jmp, 0, f) |
|
1840 |
Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
|
1841 |
AS1( ret) ASL(0) |
|
1842 |
#endif
|
|
1843 |
Mul_Column0(0, 2) |
|
1844 |
Mul_Column1(1, 3) |
|
1845 |
Mul_Column0(2, 4) |
|
1846 |
Mul_Column1(3, 5) |
|
1847 |
Mul_Column0(4, 6) |
|
1848 |
Mul_Column1(5, 7) |
|
1849 |
Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) |
|
1850 |
Bot_End(8) |
|
1851 |
}
|
|
1852 |
||
1853 |
void SSE2_MultiplyBottom32(word *C, const word *A, const word *B) |
|
1854 |
{
|
|
1855 |
Mul_Begin(16) |
|
1856 |
#ifndef __GNUC__
|
|
1857 |
ASJ( jmp, 0, f) |
|
1858 |
Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
|
1859 |
AS1( ret) ASL(0) |
|
1860 |
#endif
|
|
1861 |
Mul_Column0(0, 2) |
|
1862 |
Mul_Column1(1, 3) |
|
1863 |
Mul_Column0(2, 4) |
|
1864 |
Mul_Column1(3, 5) |
|
1865 |
Mul_Column0(4, 6) |
|
1866 |
Mul_Column1(5, 7) |
|
1867 |
Mul_Column0(6, 8) |
|
1868 |
Mul_Column1(7, 9) |
|
1869 |
Mul_Column0(8, 10) |
|
1870 |
Mul_Column1(9, 11) |
|
1871 |
Mul_Column0(10, 12) |
|
1872 |
Mul_Column1(11, 13) |
|
1873 |
Mul_Column0(12, 14) |
|
1874 |
Mul_Column1(13, 15) |
|
1875 |
Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) |
|
1876 |
Bot_End(16) |
|
1877 |
}
|
|
1878 |
||
1879 |
void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L) |
|
1880 |
{
|
|
1881 |
Top_Begin(4) |
|
1882 |
Top_Acc(3) Top_Acc(2) Top_Acc(1) |
|
1883 |
#ifndef __GNUC__
|
|
1884 |
ASJ( jmp, 0, f) |
|
1885 |
Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
|
1886 |
AS1( ret) ASL(0) |
|
1887 |
#endif
|
|
1888 |
Top_Column0(4) |
|
1889 |
Top_Column1(3) |
|
1890 |
Mul_Column0(0, 2) |
|
1891 |
Top_End(2) |
|
1892 |
}
|
|
1893 |
||
1894 |
void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L) |
|
1895 |
{
|
|
1896 |
Top_Begin(8) |
|
1897 |
Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) |
|
1898 |
#ifndef __GNUC__
|
|
1899 |
ASJ( jmp, 0, f) |
|
1900 |
Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
|
1901 |
AS1( ret) ASL(0) |
|
1902 |
#endif
|
|
1903 |
Top_Column0(8) |
|
1904 |
Top_Column1(7) |
|
1905 |
Mul_Column0(0, 6) |
|
1906 |
Mul_Column1(1, 5) |
|
1907 |
Mul_Column0(2, 4) |
|
1908 |
Mul_Column1(3, 3) |
|
1909 |
Mul_Column0(4, 2) |
|
1910 |
Top_End(4) |
|
1911 |
}
|
|
1912 |
||
1913 |
void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L) |
|
1914 |
{
|
|
1915 |
Top_Begin(16) |
|
1916 |
Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) |
|
1917 |
#ifndef __GNUC__
|
|
1918 |
ASJ( jmp, 0, f) |
|
1919 |
Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
|
1920 |
AS1( ret) ASL(0) |
|
1921 |
#endif
|
|
1922 |
Top_Column0(16) |
|
1923 |
Top_Column1(15) |
|
1924 |
Mul_Column0(0, 14) |
|
1925 |
Mul_Column1(1, 13) |
|
1926 |
Mul_Column0(2, 12) |
|
1927 |
Mul_Column1(3, 11) |
|
1928 |
Mul_Column0(4, 10) |
|
1929 |
Mul_Column1(5, 9) |
|
1930 |
Mul_Column0(6, 8) |
|
1931 |
Mul_Column1(7, 7) |
|
1932 |
Mul_Column0(8, 6) |
|
1933 |
Mul_Column1(9, 5) |
|
1934 |
Mul_Column0(10, 4) |
|
1935 |
Mul_Column1(11, 3) |
|
1936 |
Mul_Column0(12, 2) |
|
1937 |
Top_End(8) |
|
1938 |
}
|
|
1939 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
1940 |
#endif // #if CRYPTOPP_INTEGER_SSE2 |
270
by weidai
MMX/SSE2 optimizations |
1941 |
|
1942 |
// ********************************************************
|
|
1943 |
||
351
by weidai
revert to int return value for Add and Sub |
1944 |
typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B); |
270
by weidai
MMX/SSE2 optimizations |
1945 |
typedef void (* PMul)(word *C, const word *A, const word *B); |
1946 |
typedef void (* PSqu)(word *C, const word *A); |
|
1947 |
typedef void (* PMulTop)(word *C, const word *A, const word *B, word L); |
|
1948 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
1949 |
#if CRYPTOPP_INTEGER_SSE2
|
270
by weidai
MMX/SSE2 optimizations |
1950 |
static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub; |
1951 |
static size_t s_recursionLimit = 8; |
|
1952 |
#else
|
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1953 |
static const size_t s_recursionLimit = 16; |
270
by weidai
MMX/SSE2 optimizations |
1954 |
#endif
|
1955 |
||
1956 |
static PMul s_pMul[9], s_pBot[9]; |
|
1957 |
static PSqu s_pSqu[9]; |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1958 |
static PMulTop s_pTop[9]; |
270
by weidai
MMX/SSE2 optimizations |
1959 |
|
1960 |
static void SetFunctionPointers() |
|
1961 |
{
|
|
1962 |
s_pMul[0] = &Baseline_Multiply2; |
|
1963 |
s_pBot[0] = &Baseline_MultiplyBottom2; |
|
1964 |
s_pSqu[0] = &Baseline_Square2; |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1965 |
s_pTop[0] = &Baseline_MultiplyTop2; |
1966 |
s_pTop[1] = &Baseline_MultiplyTop4; |
|
270
by weidai
MMX/SSE2 optimizations |
1967 |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
1968 |
#if CRYPTOPP_INTEGER_SSE2
|
270
by weidai
MMX/SSE2 optimizations |
1969 |
if (HasSSE2()) |
1970 |
{
|
|
359
by weidai
fix crash in SSE2_Add on P4 when compiled with MSVC 6.0 with Processor Pack |
1971 |
#if _MSC_VER != 1200 || defined(NDEBUG)
|
270
by weidai
MMX/SSE2 optimizations |
1972 |
if (IsP4()) |
1973 |
{
|
|
1974 |
s_pAdd = &SSE2_Add; |
|
1975 |
s_pSub = &SSE2_Sub; |
|
1976 |
}
|
|
359
by weidai
fix crash in SSE2_Add on P4 when compiled with MSVC 6.0 with Processor Pack |
1977 |
#endif
|
270
by weidai
MMX/SSE2 optimizations |
1978 |
|
1979 |
s_recursionLimit = 32; |
|
1980 |
||
1981 |
s_pMul[1] = &SSE2_Multiply4; |
|
1982 |
s_pMul[2] = &SSE2_Multiply8; |
|
1983 |
s_pMul[4] = &SSE2_Multiply16; |
|
1984 |
s_pMul[8] = &SSE2_Multiply32; |
|
1985 |
||
1986 |
s_pBot[1] = &SSE2_MultiplyBottom4; |
|
1987 |
s_pBot[2] = &SSE2_MultiplyBottom8; |
|
1988 |
s_pBot[4] = &SSE2_MultiplyBottom16; |
|
1989 |
s_pBot[8] = &SSE2_MultiplyBottom32; |
|
1990 |
||
1991 |
s_pSqu[1] = &SSE2_Square4; |
|
1992 |
s_pSqu[2] = &SSE2_Square8; |
|
1993 |
s_pSqu[4] = &SSE2_Square16; |
|
1994 |
s_pSqu[8] = &SSE2_Square32; |
|
1995 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
1996 |
s_pTop[2] = &SSE2_MultiplyTop8; |
1997 |
s_pTop[4] = &SSE2_MultiplyTop16; |
|
1998 |
s_pTop[8] = &SSE2_MultiplyTop32; |
|
270
by weidai
MMX/SSE2 optimizations |
1999 |
}
|
2000 |
else
|
|
2001 |
#endif
|
|
2002 |
{
|
|
2003 |
s_pMul[1] = &Baseline_Multiply4; |
|
2004 |
s_pMul[2] = &Baseline_Multiply8; |
|
2005 |
||
2006 |
s_pBot[1] = &Baseline_MultiplyBottom4; |
|
2007 |
s_pBot[2] = &Baseline_MultiplyBottom8; |
|
2008 |
||
2009 |
s_pSqu[1] = &Baseline_Square4; |
|
2010 |
s_pSqu[2] = &Baseline_Square8; |
|
315
by weidai
fix compile for x64, DLL and VC 6 |
2011 |
|
2012 |
s_pTop[2] = &Baseline_MultiplyTop8; |
|
2013 |
||
2014 |
#if !CRYPTOPP_INTEGER_SSE2
|
|
2015 |
s_pMul[4] = &Baseline_Multiply16; |
|
2016 |
s_pBot[4] = &Baseline_MultiplyBottom16; |
|
2017 |
s_pSqu[4] = &Baseline_Square16; |
|
2018 |
s_pTop[4] = &Baseline_MultiplyTop16; |
|
2019 |
#endif
|
|
270
by weidai
MMX/SSE2 optimizations |
2020 |
}
|
2021 |
}
|
|
2022 |
||
351
by weidai
revert to int return value for Add and Sub |
2023 |
inline int Add(word *C, const word *A, const word *B, size_t N) |
270
by weidai
MMX/SSE2 optimizations |
2024 |
{
|
315
by weidai
fix compile for x64, DLL and VC 6 |
2025 |
#if CRYPTOPP_INTEGER_SSE2
|
270
by weidai
MMX/SSE2 optimizations |
2026 |
return s_pAdd(N, C, A, B); |
2027 |
#else
|
|
2028 |
return Baseline_Add(N, C, A, B); |
|
2029 |
#endif
|
|
2030 |
}
|
|
2031 |
||
351
by weidai
revert to int return value for Add and Sub |
2032 |
inline int Subtract(word *C, const word *A, const word *B, size_t N) |
270
by weidai
MMX/SSE2 optimizations |
2033 |
{
|
315
by weidai
fix compile for x64, DLL and VC 6 |
2034 |
#if CRYPTOPP_INTEGER_SSE2
|
270
by weidai
MMX/SSE2 optimizations |
2035 |
return s_pSub(N, C, A, B); |
2036 |
#else
|
|
2037 |
return Baseline_Sub(N, C, A, B); |
|
2038 |
#endif
|
|
2039 |
}
|
|
2040 |
||
2041 |
// ********************************************************
|
|
2042 |
||
1
by weidai
Initial revision |
2043 |
|
2044 |
#define A0 A
|
|
2045 |
#define A1 (A+N2)
|
|
2046 |
#define B0 B
|
|
2047 |
#define B1 (B+N2)
|
|
2048 |
||
2049 |
#define T0 T
|
|
2050 |
#define T1 (T+N2)
|
|
2051 |
#define T2 (T+N)
|
|
2052 |
#define T3 (T+N+N2)
|
|
2053 |
||
2054 |
#define R0 R
|
|
2055 |
#define R1 (R+N2)
|
|
2056 |
#define R2 (R+N)
|
|
2057 |
#define R3 (R+N+N2)
|
|
2058 |
||
2059 |
// R[2*N] - result = A*B
|
|
2060 |
// T[2*N] - temporary work space
|
|
2061 |
// A[N] --- multiplier
|
|
2062 |
// B[N] --- multiplicant
|
|
2063 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2064 |
void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N) |
1
by weidai
Initial revision |
2065 |
{
|
2066 |
assert(N>=2 && N%2==0); |
|
2067 |
||
270
by weidai
MMX/SSE2 optimizations |
2068 |
if (N <= s_recursionLimit) |
2069 |
s_pMul[N/4](R, A, B); |
|
1
by weidai
Initial revision |
2070 |
else
|
2071 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2072 |
const size_t N2 = N/2; |
270
by weidai
MMX/SSE2 optimizations |
2073 |
|
2074 |
size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; |
|
2075 |
Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); |
|
2076 |
||
2077 |
size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; |
|
2078 |
Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); |
|
2079 |
||
2080 |
RecursiveMultiply(R2, T2, A1, B1, N2); |
|
2081 |
RecursiveMultiply(T0, T2, R0, R1, N2); |
|
113
by weidai
unify GCC and MSVC multiplication code |
2082 |
RecursiveMultiply(R0, T2, A0, B0, N2); |
2083 |
||
2084 |
// now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
|
|
2085 |
||
270
by weidai
MMX/SSE2 optimizations |
2086 |
int c2 = Add(R2, R2, R1, N2); |
2087 |
int c3 = c2; |
|
2088 |
c2 += Add(R1, R2, R0, N2); |
|
2089 |
c3 += Add(R2, R2, R3, N2); |
|
2090 |
||
2091 |
if (AN2 == BN2) |
|
2092 |
c3 -= Subtract(R1, R1, T0, N); |
|
2093 |
else
|
|
2094 |
c3 += Add(R1, R1, T0, N); |
|
2095 |
||
2096 |
c3 += Increment(R2, N2, c2); |
|
2097 |
assert (c3 >= 0 && c3 <= 2); |
|
2098 |
Increment(R3, N2, c3); |
|
1
by weidai
Initial revision |
2099 |
}
|
2100 |
}
|
|
2101 |
||
2102 |
// R[2*N] - result = A*A
|
|
2103 |
// T[2*N] - temporary work space
|
|
2104 |
// A[N] --- number to be squared
|
|
2105 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2106 |
void RecursiveSquare(word *R, word *T, const word *A, size_t N) |
1
by weidai
Initial revision |
2107 |
{
|
2108 |
assert(N && N%2==0); |
|
270
by weidai
MMX/SSE2 optimizations |
2109 |
|
2110 |
if (N <= s_recursionLimit) |
|
2111 |
s_pSqu[N/4](R, A); |
|
1
by weidai
Initial revision |
2112 |
else
|
113
by weidai
unify GCC and MSVC multiplication code |
2113 |
{
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2114 |
const size_t N2 = N/2; |
113
by weidai
unify GCC and MSVC multiplication code |
2115 |
|
2116 |
RecursiveSquare(R0, T2, A0, N2); |
|
2117 |
RecursiveSquare(R2, T2, A1, N2); |
|
2118 |
RecursiveMultiply(T0, T2, A0, A1, N2); |
|
2119 |
||
270
by weidai
MMX/SSE2 optimizations |
2120 |
int carry = Add(R1, R1, T0, N); |
2121 |
carry += Add(R1, R1, T0, N); |
|
113
by weidai
unify GCC and MSVC multiplication code |
2122 |
Increment(R3, N2, carry); |
2123 |
}
|
|
1
by weidai
Initial revision |
2124 |
}
|
2125 |
||
2126 |
// R[N] - bottom half of A*B
|
|
270
by weidai
MMX/SSE2 optimizations |
2127 |
// T[3*N/2] - temporary work space
|
1
by weidai
Initial revision |
2128 |
// A[N] - multiplier
|
2129 |
// B[N] - multiplicant
|
|
2130 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2131 |
void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) |
1
by weidai
Initial revision |
2132 |
{
|
2133 |
assert(N>=2 && N%2==0); |
|
270
by weidai
MMX/SSE2 optimizations |
2134 |
|
2135 |
if (N <= s_recursionLimit) |
|
2136 |
s_pBot[N/4](R, A, B); |
|
1
by weidai
Initial revision |
2137 |
else
|
113
by weidai
unify GCC and MSVC multiplication code |
2138 |
{
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2139 |
const size_t N2 = N/2; |
113
by weidai
unify GCC and MSVC multiplication code |
2140 |
|
2141 |
RecursiveMultiply(R, T, A0, B0, N2); |
|
2142 |
RecursiveMultiplyBottom(T0, T1, A1, B0, N2); |
|
270
by weidai
MMX/SSE2 optimizations |
2143 |
Add(R1, R1, T0, N2); |
113
by weidai
unify GCC and MSVC multiplication code |
2144 |
RecursiveMultiplyBottom(T0, T1, A0, B1, N2); |
270
by weidai
MMX/SSE2 optimizations |
2145 |
Add(R1, R1, T0, N2); |
113
by weidai
unify GCC and MSVC multiplication code |
2146 |
}
|
1
by weidai
Initial revision |
2147 |
}
|
2148 |
||
2149 |
// R[N] --- upper half of A*B
|
|
2150 |
// T[2*N] - temporary work space
|
|
2151 |
// L[N] --- lower half of A*B
|
|
2152 |
// A[N] --- multiplier
|
|
2153 |
// B[N] --- multiplicant
|
|
2154 |
||
270
by weidai
MMX/SSE2 optimizations |
2155 |
void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N) |
1
by weidai
Initial revision |
2156 |
{
|
2157 |
assert(N>=2 && N%2==0); |
|
2158 |
||
315
by weidai
fix compile for x64, DLL and VC 6 |
2159 |
if (N <= s_recursionLimit) |
2160 |
s_pTop[N/4](R, A, B, L[N-1]); |
|
1
by weidai
Initial revision |
2161 |
else
|
2162 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2163 |
const size_t N2 = N/2; |
270
by weidai
MMX/SSE2 optimizations |
2164 |
|
2165 |
size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; |
|
2166 |
Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); |
|
2167 |
||
2168 |
size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; |
|
2169 |
Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); |
|
2170 |
||
2171 |
RecursiveMultiply(T0, T2, R0, R1, N2); |
|
2172 |
RecursiveMultiply(R0, T2, A1, B1, N2); |
|
2173 |
||
2174 |
// now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
|
|
2175 |
||
2176 |
int t, c3; |
|
2177 |
int c2 = Subtract(T2, L+N2, L, N2); |
|
2178 |
||
2179 |
if (AN2 == BN2) |
|
2180 |
{
|
|
2181 |
c2 -= Add(T2, T2, T0, N2); |
|
2182 |
t = (Compare(T2, R0, N2) == -1); |
|
2183 |
c3 = t - Subtract(T2, T2, T1, N2); |
|
2184 |
}
|
|
2185 |
else
|
|
2186 |
{
|
|
2187 |
c2 += Subtract(T2, T2, T0, N2); |
|
2188 |
t = (Compare(T2, R0, N2) == -1); |
|
2189 |
c3 = t + Add(T2, T2, T1, N2); |
|
2190 |
}
|
|
2191 |
||
2192 |
c2 += t; |
|
2193 |
if (c2 >= 0) |
|
2194 |
c3 += Increment(T2, N2, c2); |
|
2195 |
else
|
|
2196 |
c3 -= Decrement(T2, N2, -c2); |
|
2197 |
c3 += Add(R0, T2, R1, N2); |
|
2198 |
||
2199 |
assert (c3 >= 0 && c3 <= 2); |
|
2200 |
Increment(R1, N2, c3); |
|
1
by weidai
Initial revision |
2201 |
}
|
2202 |
}
|
|
2203 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2204 |
inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N) |
1
by weidai
Initial revision |
2205 |
{
|
113
by weidai
unify GCC and MSVC multiplication code |
2206 |
RecursiveMultiply(R, T, A, B, N); |
1
by weidai
Initial revision |
2207 |
}
|
2208 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2209 |
inline void Square(word *R, word *T, const word *A, size_t N) |
1
by weidai
Initial revision |
2210 |
{
|
113
by weidai
unify GCC and MSVC multiplication code |
2211 |
RecursiveSquare(R, T, A, N); |
1
by weidai
Initial revision |
2212 |
}
|
2213 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2214 |
inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) |
1
by weidai
Initial revision |
2215 |
{
|
113
by weidai
unify GCC and MSVC multiplication code |
2216 |
RecursiveMultiplyBottom(R, T, A, B, N); |
1
by weidai
Initial revision |
2217 |
}
|
2218 |
||
2219 |
// R[NA+NB] - result = A*B
|
|
2220 |
// T[NA+NB] - temporary work space
|
|
2221 |
// A[NA] ---- multiplier
|
|
2222 |
// B[NB] ---- multiplicant
|
|
2223 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2224 |
void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB) |
1
by weidai
Initial revision |
2225 |
{
|
2226 |
if (NA == NB) |
|
2227 |
{
|
|
2228 |
if (A == B) |
|
2229 |
Square(R, T, A, NA); |
|
2230 |
else
|
|
2231 |
Multiply(R, T, A, B, NA); |
|
2232 |
||
2233 |
return; |
|
2234 |
}
|
|
2235 |
||
2236 |
if (NA > NB) |
|
2237 |
{
|
|
2238 |
std::swap(A, B); |
|
2239 |
std::swap(NA, NB); |
|
2240 |
}
|
|
2241 |
||
2242 |
assert(NB % NA == 0); |
|
2243 |
||
2244 |
if (NA==2 && !A[1]) |
|
2245 |
{
|
|
2246 |
switch (A[0]) |
|
2247 |
{
|
|
2248 |
case 0: |
|
2249 |
SetWords(R, 0, NB+2); |
|
2250 |
return; |
|
2251 |
case 1: |
|
2252 |
CopyWords(R, B, NB); |
|
2253 |
R[NB] = R[NB+1] = 0; |
|
2254 |
return; |
|
2255 |
default: |
|
2256 |
R[NB] = LinearMultiply(R, B, A[0], NB); |
|
2257 |
R[NB+1] = 0; |
|
2258 |
return; |
|
2259 |
}
|
|
2260 |
}
|
|
2261 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2262 |
size_t i; |
270
by weidai
MMX/SSE2 optimizations |
2263 |
if ((NB/NA)%2 == 0) |
2264 |
{
|
|
2265 |
Multiply(R, T, A, B, NA); |
|
2266 |
CopyWords(T+2*NA, R+NA, NA); |
|
1
by weidai
Initial revision |
2267 |
|
270
by weidai
MMX/SSE2 optimizations |
2268 |
for (i=2*NA; i<NB; i+=2*NA) |
2269 |
Multiply(T+NA+i, T, A, B+i, NA); |
|
2270 |
for (i=NA; i<NB; i+=2*NA) |
|
2271 |
Multiply(R+i, T, A, B+i, NA); |
|
2272 |
}
|
|
2273 |
else
|
|
2274 |
{
|
|
2275 |
for (i=0; i<NB; i+=2*NA) |
|
2276 |
Multiply(R+i, T, A, B+i, NA); |
|
2277 |
for (i=NA; i<NB; i+=2*NA) |
|
2278 |
Multiply(T+NA+i, T, A, B+i, NA); |
|
2279 |
}
|
|
1
by weidai
Initial revision |
2280 |
|
2281 |
if (Add(R+NA, R+NA, T+2*NA, NB-NA)) |
|
2282 |
Increment(R+NB, NA); |
|
2283 |
}
|
|
2284 |
||
2285 |
// R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
|
|
2286 |
// T[3*N/2] - temporary work space
|
|
2287 |
// A[N] ----- an odd number as input
|
|
2288 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2289 |
void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N) |
1
by weidai
Initial revision |
2290 |
{
|
2291 |
if (N==2) |
|
100
by weidai
fix bugs in 64-bit CPU support |
2292 |
{
|
2293 |
T[0] = AtomicInverseModPower2(A[0]); |
|
2294 |
T[1] = 0; |
|
270
by weidai
MMX/SSE2 optimizations |
2295 |
s_pBot[0](T+2, T, A); |
100
by weidai
fix bugs in 64-bit CPU support |
2296 |
TwosComplement(T+2, 2); |
2297 |
Increment(T+2, 2, 2); |
|
270
by weidai
MMX/SSE2 optimizations |
2298 |
s_pBot[0](R, T, T+2); |
100
by weidai
fix bugs in 64-bit CPU support |
2299 |
}
|
1
by weidai
Initial revision |
2300 |
else
|
2301 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2302 |
const size_t N2 = N/2; |
1
by weidai
Initial revision |
2303 |
RecursiveInverseModPower2(R0, T0, A0, N2); |
2304 |
T0[0] = 1; |
|
2305 |
SetWords(T0+1, 0, N2-1); |
|
2306 |
MultiplyTop(R1, T1, T0, R0, A0, N2); |
|
2307 |
MultiplyBottom(T0, T1, R0, A1, N2); |
|
2308 |
Add(T0, R1, T0, N2); |
|
2309 |
TwosComplement(T0, N2); |
|
2310 |
MultiplyBottom(R1, T1, R0, T0, N2); |
|
2311 |
}
|
|
2312 |
}
|
|
2313 |
||
2314 |
// R[N] --- result = X/(2**(WORD_BITS*N)) mod M
|
|
2315 |
// T[3*N] - temporary work space
|
|
2316 |
// X[2*N] - number to be reduced
|
|
2317 |
// M[N] --- modulus
|
|
2318 |
// U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
|
|
2319 |
||
270
by weidai
MMX/SSE2 optimizations |
2320 |
void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N) |
1
by weidai
Initial revision |
2321 |
{
|
270
by weidai
MMX/SSE2 optimizations |
2322 |
#if 1
|
1
by weidai
Initial revision |
2323 |
MultiplyBottom(R, T, X, U, N); |
2324 |
MultiplyTop(T, T+N, X, R, M, N); |
|
25
by weidai
increase resistance against timing attacks |
2325 |
word borrow = Subtract(T, X+N, T, N); |
2326 |
// defend against timing attack by doing this Add even when not needed
|
|
2327 |
word carry = Add(T+N, T, M, N); |
|
377
by weidai
remove branch in assert |
2328 |
assert(carry | !borrow); |
363
by weidai
fix possible branch prediction analysis (BPA) vulnerability |
2329 |
CopyWords(R, T + ((0-borrow) & N), N); |
270
by weidai
MMX/SSE2 optimizations |
2330 |
#elif 0
|
2331 |
const word u = 0-U[0]; |
|
2332 |
Declare2Words(p) |
|
2333 |
for (size_t i=0; i<N; i++) |
|
2334 |
{
|
|
2335 |
const word t = u * X[i]; |
|
2336 |
word c = 0; |
|
2337 |
for (size_t j=0; j<N; j+=2) |
|
2338 |
{
|
|
2339 |
MultiplyWords(p, t, M[j]); |
|
2340 |
Acc2WordsBy1(p, X[i+j]); |
|
2341 |
Acc2WordsBy1(p, c); |
|
2342 |
X[i+j] = LowWord(p); |
|
2343 |
c = HighWord(p); |
|
2344 |
MultiplyWords(p, t, M[j+1]); |
|
2345 |
Acc2WordsBy1(p, X[i+j+1]); |
|
2346 |
Acc2WordsBy1(p, c); |
|
2347 |
X[i+j+1] = LowWord(p); |
|
2348 |
c = HighWord(p); |
|
2349 |
}
|
|
2350 |
||
2351 |
if (Increment(X+N+i, N-i, c)) |
|
2352 |
while (!Subtract(X+N, X+N, M, N)) {} |
|
2353 |
}
|
|
2354 |
||
2355 |
memcpy(R, X+N, N*WORD_SIZE); |
|
2356 |
#else
|
|
2357 |
__m64 u = _mm_cvtsi32_si64(0-U[0]), p; |
|
2358 |
for (size_t i=0; i<N; i++) |
|
2359 |
{
|
|
2360 |
__m64 t = _mm_cvtsi32_si64(X[i]); |
|
2361 |
t = _mm_mul_su32(t, u); |
|
2362 |
__m64 c = _mm_setzero_si64(); |
|
2363 |
for (size_t j=0; j<N; j+=2) |
|
2364 |
{
|
|
2365 |
p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j])); |
|
2366 |
p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j])); |
|
2367 |
c = _mm_add_si64(c, p); |
|
2368 |
X[i+j] = _mm_cvtsi64_si32(c); |
|
2369 |
c = _mm_srli_si64(c, 32); |
|
2370 |
p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1])); |
|
2371 |
p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1])); |
|
2372 |
c = _mm_add_si64(c, p); |
|
2373 |
X[i+j+1] = _mm_cvtsi64_si32(c); |
|
2374 |
c = _mm_srli_si64(c, 32); |
|
2375 |
}
|
|
2376 |
||
2377 |
if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c))) |
|
2378 |
while (!Subtract(X+N, X+N, M, N)) {} |
|
2379 |
}
|
|
2380 |
||
2381 |
memcpy(R, X+N, N*WORD_SIZE); |
|
2382 |
_mm_empty(); |
|
2383 |
#endif
|
|
1
by weidai
Initial revision |
2384 |
}
|
2385 |
||
2386 |
// R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
|
|
2387 |
// T[2*N] - temporary work space
|
|
2388 |
// X[2*N] - number to be reduced
|
|
2389 |
// M[N] --- modulus
|
|
2390 |
// U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
|
|
2391 |
// V[N] --- 2**(WORD_BITS*3*N/2) mod M
|
|
2392 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2393 |
void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N) |
1
by weidai
Initial revision |
2394 |
{
|
2395 |
assert(N%2==0 && N>=4); |
|
2396 |
||
2397 |
#define M0 M
|
|
2398 |
#define M1 (M+N2)
|
|
2399 |
#define V0 V
|
|
2400 |
#define V1 (V+N2)
|
|
2401 |
||
2402 |
#define X0 X
|
|
2403 |
#define X1 (X+N2)
|
|
2404 |
#define X2 (X+N)
|
|
2405 |
#define X3 (X+N+N2)
|
|
2406 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2407 |
const size_t N2 = N/2; |
1
by weidai
Initial revision |
2408 |
Multiply(T0, T2, V0, X3, N2); |
2409 |
int c2 = Add(T0, T0, X0, N); |
|
2410 |
MultiplyBottom(T3, T2, T0, U, N2); |
|
2411 |
MultiplyTop(T2, R, T0, T3, M0, N2); |
|
2412 |
c2 -= Subtract(T2, T1, T2, N2); |
|
2413 |
Multiply(T0, R, T3, M1, N2); |
|
2414 |
c2 -= Subtract(T0, T2, T0, N2); |
|
2415 |
int c3 = -(int)Subtract(T1, X2, T1, N2); |
|
2416 |
Multiply(R0, T2, V1, X3, N2); |
|
2417 |
c3 += Add(R, R, T, N); |
|
2418 |
||
2419 |
if (c2>0) |
|
2420 |
c3 += Increment(R1, N2); |
|
2421 |
else if (c2<0) |
|
2422 |
c3 -= Decrement(R1, N2, -c2); |
|
2423 |
||
2424 |
assert(c3>=-1 && c3<=1); |
|
2425 |
if (c3>0) |
|
2426 |
Subtract(R, R, M, N); |
|
2427 |
else if (c3<0) |
|
2428 |
Add(R, R, M, N); |
|
2429 |
||
2430 |
#undef M0
|
|
2431 |
#undef M1
|
|
2432 |
#undef V0
|
|
2433 |
#undef V1
|
|
2434 |
||
2435 |
#undef X0
|
|
2436 |
#undef X1
|
|
2437 |
#undef X2
|
|
2438 |
#undef X3
|
|
2439 |
}
|
|
2440 |
||
2441 |
#undef A0
|
|
2442 |
#undef A1
|
|
2443 |
#undef B0
|
|
2444 |
#undef B1
|
|
2445 |
||
2446 |
#undef T0
|
|
2447 |
#undef T1
|
|
2448 |
#undef T2
|
|
2449 |
#undef T3
|
|
2450 |
||
2451 |
#undef R0
|
|
2452 |
#undef R1
|
|
2453 |
#undef R2
|
|
2454 |
#undef R3
|
|
2455 |
||
100
by weidai
fix bugs in 64-bit CPU support |
2456 |
/*
|
1
by weidai
Initial revision |
2457 |
// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
|
2458 |
static word SubatomicDivide(word *A, word B0, word B1)
|
|
2459 |
{
|
|
2460 |
// assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
|
|
2461 |
assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
|
|
2462 |
||
100
by weidai
fix bugs in 64-bit CPU support |
2463 |
// estimate the quotient: do a 2 word by 1 word divide
|
1
by weidai
Initial revision |
2464 |
word Q;
|
2465 |
if (B1+1 == 0)
|
|
2466 |
Q = A[2];
|
|
2467 |
else
|
|
100
by weidai
fix bugs in 64-bit CPU support |
2468 |
Q = DWord(A[1], A[2]).DividedBy(B1+1);
|
1
by weidai
Initial revision |
2469 |
|
2470 |
// now subtract Q*B from A
|
|
100
by weidai
fix bugs in 64-bit CPU support |
2471 |
DWord p = DWord::Multiply(B0, Q);
|
2472 |
DWord u = (DWord) A[0] - p.GetLowHalf();
|
|
2473 |
A[0] = u.GetLowHalf();
|
|
2474 |
u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
|
|
2475 |
A[1] = u.GetLowHalf();
|
|
2476 |
A[2] += u.GetHighHalf();
|
|
1
by weidai
Initial revision |
2477 |
|
2478 |
// Q <= actual quotient, so fix it
|
|
2479 |
while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
|
|
2480 |
{
|
|
100
by weidai
fix bugs in 64-bit CPU support |
2481 |
u = (DWord) A[0] - B0;
|
2482 |
A[0] = u.GetLowHalf();
|
|
2483 |
u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
|
|
2484 |
A[1] = u.GetLowHalf();
|
|
2485 |
A[2] += u.GetHighHalf();
|
|
1
by weidai
Initial revision |
2486 |
Q++;
|
2487 |
assert(Q); // shouldn't overflow
|
|
2488 |
}
|
|
2489 |
||
2490 |
return Q;
|
|
2491 |
}
|
|
2492 |
||
2493 |
// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
|
|
2494 |
static inline void AtomicDivide(word *Q, const word *A, const word *B)
|
|
2495 |
{
|
|
2496 |
if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
|
|
2497 |
{
|
|
2498 |
Q[0] = A[2];
|
|
2499 |
Q[1] = A[3];
|
|
2500 |
}
|
|
2501 |
else
|
|
2502 |
{
|
|
2503 |
word T[4];
|
|
2504 |
T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
|
|
2505 |
Q[1] = SubatomicDivide(T+1, B[0], B[1]);
|
|
2506 |
Q[0] = SubatomicDivide(T, B[0], B[1]);
|
|
2507 |
||
2508 |
#ifndef NDEBUG
|
|
2509 |
// multiply quotient and divisor and add remainder, make sure it equals dividend
|
|
2510 |
assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
|
|
2511 |
word P[4];
|
|
2512 |
LowLevel::Multiply2(P, Q, B);
|
|
2513 |
Add(P, P, T, 4);
|
|
2514 |
assert(memcmp(P, A, 4*WORD_SIZE)==0);
|
|
2515 |
#endif
|
|
2516 |
}
|
|
2517 |
}
|
|
100
by weidai
fix bugs in 64-bit CPU support |
2518 |
*/
|
2519 |
||
2520 |
static inline void AtomicDivide(word *Q, const word *A, const word *B) |
|
2521 |
{
|
|
2522 |
word T[4]; |
|
2523 |
DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1])); |
|
2524 |
Q[0] = q.GetLowHalf(); |
|
2525 |
Q[1] = q.GetHighHalf(); |
|
2526 |
||
2527 |
#ifndef NDEBUG
|
|
2528 |
if (B[0] || B[1]) |
|
2529 |
{
|
|
2530 |
// multiply quotient and divisor and add remainder, make sure it equals dividend
|
|
2531 |
assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); |
|
2532 |
word P[4]; |
|
270
by weidai
MMX/SSE2 optimizations |
2533 |
s_pMul[0](P, Q, B); |
100
by weidai
fix bugs in 64-bit CPU support |
2534 |
Add(P, P, T, 4); |
2535 |
assert(memcmp(P, A, 4*WORD_SIZE)==0); |
|
2536 |
}
|
|
2537 |
#endif
|
|
2538 |
}
|
|
1
by weidai
Initial revision |
2539 |
|
2540 |
// for use by Divide(), corrects the underestimated quotient {Q1,Q0}
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2541 |
static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N) |
1
by weidai
Initial revision |
2542 |
{
|
2543 |
assert(N && N%2==0); |
|
2544 |
||
270
by weidai
MMX/SSE2 optimizations |
2545 |
AsymmetricMultiply(T, T+N+2, Q, 2, B, N); |
1
by weidai
Initial revision |
2546 |
|
2547 |
word borrow = Subtract(R, R, T, N+2); |
|
2548 |
assert(!borrow && !R[N+1]); |
|
2549 |
||
2550 |
while (R[N] || Compare(R, B, N) >= 0) |
|
2551 |
{
|
|
2552 |
R[N] -= Subtract(R, R, B, N); |
|
2553 |
Q[1] += (++Q[0]==0); |
|
2554 |
assert(Q[0] || Q[1]); // no overflow |
|
2555 |
}
|
|
2556 |
}
|
|
2557 |
||
2558 |
// R[NB] -------- remainder = A%B
|
|
2559 |
// Q[NA-NB+2] --- quotient = A/B
|
|
270
by weidai
MMX/SSE2 optimizations |
2560 |
// T[NA+3*(NB+2)] - temp work space
|
1
by weidai
Initial revision |
2561 |
// A[NA] -------- dividend
|
2562 |
// B[NB] -------- divisor
|
|
2563 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2564 |
void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB) |
1
by weidai
Initial revision |
2565 |
{
|
2566 |
assert(NA && NB && NA%2==0 && NB%2==0); |
|
2567 |
assert(B[NB-1] || B[NB-2]); |
|
2568 |
assert(NB <= NA); |
|
2569 |
||
2570 |
// set up temporary work space
|
|
2571 |
word *const TA=T; |
|
2572 |
word *const TB=T+NA+2; |
|
2573 |
word *const TP=T+NA+2+NB; |
|
2574 |
||
2575 |
// copy B into TB and normalize it so that TB has highest bit set to 1
|
|
2576 |
unsigned shiftWords = (B[NB-1]==0); |
|
2577 |
TB[0] = TB[NB-1] = 0; |
|
2578 |
CopyWords(TB+shiftWords, B, NB-shiftWords); |
|
2579 |
unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]); |
|
2580 |
assert(shiftBits < WORD_BITS); |
|
2581 |
ShiftWordsLeftByBits(TB, NB, shiftBits); |
|
2582 |
||
2583 |
// copy A into TA and normalize it
|
|
2584 |
TA[0] = TA[NA] = TA[NA+1] = 0; |
|
2585 |
CopyWords(TA+shiftWords, A, NA); |
|
2586 |
ShiftWordsLeftByBits(TA, NA+2, shiftBits); |
|
2587 |
||
2588 |
if (TA[NA+1]==0 && TA[NA] <= 1) |
|
2589 |
{
|
|
2590 |
Q[NA-NB+1] = Q[NA-NB] = 0; |
|
2591 |
while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0) |
|
2592 |
{
|
|
2593 |
TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB); |
|
2594 |
++Q[NA-NB]; |
|
2595 |
}
|
|
2596 |
}
|
|
2597 |
else
|
|
2598 |
{
|
|
2599 |
NA+=2; |
|
2600 |
assert(Compare(TA+NA-NB, TB, NB) < 0); |
|
2601 |
}
|
|
2602 |
||
2603 |
word BT[2]; |
|
2604 |
BT[0] = TB[NB-2] + 1; |
|
2605 |
BT[1] = TB[NB-1] + (BT[0]==0); |
|
2606 |
||
2607 |
// start reducing TA mod TB, 2 words at a time
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2608 |
for (size_t i=NA-2; i>=NB; i-=2) |
1
by weidai
Initial revision |
2609 |
{
|
2610 |
AtomicDivide(Q+i-NB, TA+i-2, BT); |
|
2611 |
CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB); |
|
2612 |
}
|
|
2613 |
||
2614 |
// copy TA into R, and denormalize it
|
|
2615 |
CopyWords(R, TA+shiftWords, NB); |
|
2616 |
ShiftWordsRightByBits(R, NB, shiftBits); |
|
2617 |
}
|
|
2618 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2619 |
static inline size_t EvenWordCount(const word *X, size_t N) |
1
by weidai
Initial revision |
2620 |
{
|
2621 |
while (N && X[N-2]==0 && X[N-1]==0) |
|
2622 |
N-=2; |
|
2623 |
return N; |
|
2624 |
}
|
|
2625 |
||
2626 |
// return k
|
|
2627 |
// R[N] --- result = A^(-1) * 2^k mod M
|
|
2628 |
// T[4*N] - temporary work space
|
|
2629 |
// A[NA] -- number to take inverse of
|
|
2630 |
// M[N] --- modulus
|
|
2631 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2632 |
unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N) |
1
by weidai
Initial revision |
2633 |
{
|
2634 |
assert(NA<=N && N && N%2==0); |
|
2635 |
||
2636 |
word *b = T; |
|
2637 |
word *c = T+N; |
|
2638 |
word *f = T+2*N; |
|
2639 |
word *g = T+3*N; |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2640 |
size_t bcLen=2, fgLen=EvenWordCount(M, N); |
468
by weidai
switch to non-branching code in AlmostInverse() |
2641 |
unsigned int k=0; |
2642 |
bool s=false; |
|
1
by weidai
Initial revision |
2643 |
|
2644 |
SetWords(T, 0, 3*N); |
|
2645 |
b[0]=1; |
|
2646 |
CopyWords(f, A, NA); |
|
2647 |
CopyWords(g, M, N); |
|
2648 |
||
2649 |
while (1) |
|
2650 |
{
|
|
2651 |
word t=f[0]; |
|
2652 |
while (!t) |
|
2653 |
{
|
|
2654 |
if (EvenWordCount(f, fgLen)==0) |
|
2655 |
{
|
|
2656 |
SetWords(R, 0, N); |
|
2657 |
return 0; |
|
2658 |
}
|
|
2659 |
||
2660 |
ShiftWordsRightByWords(f, fgLen, 1); |
|
468
by weidai
switch to non-branching code in AlmostInverse() |
2661 |
bcLen += 2 * (c[bcLen-1] != 0); |
1
by weidai
Initial revision |
2662 |
assert(bcLen <= N); |
2663 |
ShiftWordsLeftByWords(c, bcLen, 1); |
|
2664 |
k+=WORD_BITS; |
|
2665 |
t=f[0]; |
|
2666 |
}
|
|
2667 |
||
468
by weidai
switch to non-branching code in AlmostInverse() |
2668 |
unsigned int i = TrailingZeros(t); |
2669 |
t >>= i; |
|
2670 |
k += i; |
|
1
by weidai
Initial revision |
2671 |
|
468
by weidai
switch to non-branching code in AlmostInverse() |
2672 |
if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0) |
1
by weidai
Initial revision |
2673 |
{
|
468
by weidai
switch to non-branching code in AlmostInverse() |
2674 |
if (s) |
2675 |
Subtract(R, M, b, N); |
|
2676 |
else
|
|
1
by weidai
Initial revision |
2677 |
CopyWords(R, b, N); |
2678 |
return k; |
|
2679 |
}
|
|
2680 |
||
2681 |
ShiftWordsRightByBits(f, fgLen, i); |
|
468
by weidai
switch to non-branching code in AlmostInverse() |
2682 |
t = ShiftWordsLeftByBits(c, bcLen, i); |
2683 |
c[bcLen] += t; |
|
2684 |
bcLen += 2 * (t!=0); |
|
2685 |
assert(bcLen <= N); |
|
2686 |
||
2687 |
bool swap = Compare(f, g, fgLen)==-1; |
|
2688 |
ConditionalSwapPointers(swap, f, g); |
|
2689 |
ConditionalSwapPointers(swap, b, c); |
|
2690 |
s ^= swap; |
|
2691 |
||
2692 |
fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]); |
|
1
by weidai
Initial revision |
2693 |
|
2694 |
Subtract(f, f, g, fgLen); |
|
468
by weidai
switch to non-branching code in AlmostInverse() |
2695 |
t = Add(b, b, c, bcLen); |
2696 |
b[bcLen] += t; |
|
2697 |
bcLen += 2*t; |
|
2698 |
assert(bcLen <= N); |
|
1
by weidai
Initial revision |
2699 |
}
|
2700 |
}
|
|
2701 |
||
2702 |
// R[N] - result = A/(2^k) mod M
|
|
2703 |
// A[N] - input
|
|
2704 |
// M[N] - modulus
|
|
2705 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2706 |
void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) |
1
by weidai
Initial revision |
2707 |
{
|
2708 |
CopyWords(R, A, N); |
|
2709 |
||
2710 |
while (k--) |
|
2711 |
{
|
|
2712 |
if (R[0]%2==0) |
|
2713 |
ShiftWordsRightByBits(R, N, 1); |
|
2714 |
else
|
|
2715 |
{
|
|
2716 |
word carry = Add(R, R, M, N); |
|
2717 |
ShiftWordsRightByBits(R, N, 1); |
|
2718 |
R[N-1] += carry<<(WORD_BITS-1); |
|
2719 |
}
|
|
2720 |
}
|
|
2721 |
}
|
|
2722 |
||
2723 |
// R[N] - result = A*(2^k) mod M
|
|
2724 |
// A[N] - input
|
|
2725 |
// M[N] - modulus
|
|
2726 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2727 |
void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) |
1
by weidai
Initial revision |
2728 |
{
|
2729 |
CopyWords(R, A, N); |
|
2730 |
||
2731 |
while (k--) |
|
2732 |
if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0) |
|
2733 |
Subtract(R, R, M, N); |
|
2734 |
}
|
|
2735 |
||
2736 |
// ******************************************************************
|
|
2737 |
||
207
by weidai
improve Integer initialization |
2738 |
InitializeInteger::InitializeInteger() |
2739 |
{
|
|
2740 |
if (!g_pAssignIntToInteger) |
|
2741 |
{
|
|
270
by weidai
MMX/SSE2 optimizations |
2742 |
SetFunctionPointers(); |
207
by weidai
improve Integer initialization |
2743 |
g_pAssignIntToInteger = AssignIntToInteger; |
2744 |
}
|
|
2745 |
}
|
|
2746 |
||
1
by weidai
Initial revision |
2747 |
static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8}; |
2748 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2749 |
static inline size_t RoundupSize(size_t n) |
1
by weidai
Initial revision |
2750 |
{
|
2751 |
if (n<=8) |
|
2752 |
return RoundupSizeTable[n]; |
|
2753 |
else if (n<=16) |
|
2754 |
return 16; |
|
2755 |
else if (n<=32) |
|
2756 |
return 32; |
|
2757 |
else if (n<=64) |
|
2758 |
return 64; |
|
202
by weidai
fix MSVC 2005 warnings |
2759 |
else return size_t(1) << BitPrecision(n-1); |
1
by weidai
Initial revision |
2760 |
}
|
2761 |
||
2762 |
Integer::Integer() |
|
2763 |
: reg(2), sign(POSITIVE) |
|
2764 |
{
|
|
2765 |
reg[0] = reg[1] = 0; |
|
2766 |
}
|
|
2767 |
||
2768 |
Integer::Integer(const Integer& t) |
|
2769 |
: reg(RoundupSize(t.WordCount())), sign(t.sign) |
|
2770 |
{
|
|
2771 |
CopyWords(reg, t.reg, reg.size()); |
|
2772 |
}
|
|
2773 |
||
100
by weidai
fix bugs in 64-bit CPU support |
2774 |
Integer::Integer(Sign s, lword value) |
2775 |
: reg(2), sign(s) |
|
2776 |
{
|
|
2777 |
reg[0] = word(value); |
|
2778 |
reg[1] = word(SafeRightShift<WORD_BITS>(value)); |
|
2779 |
}
|
|
2780 |
||
1
by weidai
Initial revision |
2781 |
Integer::Integer(signed long value) |
2782 |
: reg(2) |
|
2783 |
{
|
|
2784 |
if (value >= 0) |
|
2785 |
sign = POSITIVE; |
|
2786 |
else
|
|
2787 |
{
|
|
2788 |
sign = NEGATIVE; |
|
2789 |
value = -value; |
|
2790 |
}
|
|
2791 |
reg[0] = word(value); |
|
100
by weidai
fix bugs in 64-bit CPU support |
2792 |
reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value)); |
1
by weidai
Initial revision |
2793 |
}
|
2794 |
||
32
by weidai
fix warnings for VC7 and GCC |
2795 |
Integer::Integer(Sign s, word high, word low) |
2796 |
: reg(2), sign(s) |
|
2797 |
{
|
|
2798 |
reg[0] = low; |
|
2799 |
reg[1] = high; |
|
2800 |
}
|
|
2801 |
||
1
by weidai
Initial revision |
2802 |
bool Integer::IsConvertableToLong() const |
2803 |
{
|
|
2804 |
if (ByteCount() > sizeof(long)) |
|
2805 |
return false; |
|
2806 |
||
202
by weidai
fix MSVC 2005 warnings |
2807 |
unsigned long value = (unsigned long)reg[0]; |
2808 |
value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); |
|
1
by weidai
Initial revision |
2809 |
|
2810 |
if (sign==POSITIVE) |
|
2811 |
return (signed long)value >= 0; |
|
2812 |
else
|
|
2813 |
return -(signed long)value < 0; |
|
2814 |
}
|
|
2815 |
||
2816 |
signed long Integer::ConvertToLong() const |
|
2817 |
{
|
|
2818 |
assert(IsConvertableToLong()); |
|
2819 |
||
202
by weidai
fix MSVC 2005 warnings |
2820 |
unsigned long value = (unsigned long)reg[0]; |
2821 |
value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); |
|
1
by weidai
Initial revision |
2822 |
return sign==POSITIVE ? value : -(signed long)value; |
2823 |
}
|
|
2824 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2825 |
Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s) |
1
by weidai
Initial revision |
2826 |
{
|
2827 |
Decode(encodedInteger, byteCount, s); |
|
2828 |
}
|
|
2829 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2830 |
Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s) |
1
by weidai
Initial revision |
2831 |
{
|
2832 |
Decode(encodedInteger, byteCount, s); |
|
2833 |
}
|
|
2834 |
||
2835 |
Integer::Integer(BufferedTransformation &bt) |
|
2836 |
{
|
|
2837 |
BERDecode(bt); |
|
2838 |
}
|
|
2839 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2840 |
Integer::Integer(RandomNumberGenerator &rng, size_t bitcount) |
1
by weidai
Initial revision |
2841 |
{
|
2842 |
Randomize(rng, bitcount); |
|
2843 |
}
|
|
2844 |
||
2845 |
Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) |
|
2846 |
{
|
|
2847 |
if (!Randomize(rng, min, max, rnType, equiv, mod)) |
|
2848 |
throw Integer::RandomNumberNotFound(); |
|
2849 |
}
|
|
2850 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2851 |
Integer Integer::Power2(size_t e) |
1
by weidai
Initial revision |
2852 |
{
|
2853 |
Integer r((word)0, BitsToWords(e+1)); |
|
2854 |
r.SetBit(e); |
|
2855 |
return r; |
|
2856 |
}
|
|
2857 |
||
106
by weidai
fix potential threading problem with initialization of static objects |
2858 |
template <long i> |
2859 |
struct NewInteger |
|
2860 |
{
|
|
2861 |
Integer * operator()() const |
|
2862 |
{
|
|
2863 |
return new Integer(i); |
|
2864 |
}
|
|
2865 |
};
|
|
2866 |
||
1
by weidai
Initial revision |
2867 |
const Integer &Integer::Zero() |
2868 |
{
|
|
106
by weidai
fix potential threading problem with initialization of static objects |
2869 |
return Singleton<Integer>().Ref(); |
1
by weidai
Initial revision |
2870 |
}
|
2871 |
||
2872 |
const Integer &Integer::One() |
|
2873 |
{
|
|
106
by weidai
fix potential threading problem with initialization of static objects |
2874 |
return Singleton<Integer, NewInteger<1> >().Ref(); |
1
by weidai
Initial revision |
2875 |
}
|
2876 |
||
2877 |
const Integer &Integer::Two() |
|
2878 |
{
|
|
106
by weidai
fix potential threading problem with initialization of static objects |
2879 |
return Singleton<Integer, NewInteger<2> >().Ref(); |
1
by weidai
Initial revision |
2880 |
}
|
2881 |
||
2882 |
bool Integer::operator!() const |
|
2883 |
{
|
|
2884 |
return IsNegative() ? false : (reg[0]==0 && WordCount()==0); |
|
2885 |
}
|
|
2886 |
||
2887 |
Integer& Integer::operator=(const Integer& t) |
|
2888 |
{
|
|
2889 |
if (this != &t) |
|
2890 |
{
|
|
270
by weidai
MMX/SSE2 optimizations |
2891 |
if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0) |
2892 |
reg.New(RoundupSize(t.WordCount())); |
|
1
by weidai
Initial revision |
2893 |
CopyWords(reg, t.reg, reg.size()); |
2894 |
sign = t.sign; |
|
2895 |
}
|
|
2896 |
return *this; |
|
2897 |
}
|
|
2898 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2899 |
bool Integer::GetBit(size_t n) const |
1
by weidai
Initial revision |
2900 |
{
|
2901 |
if (n/WORD_BITS >= reg.size()) |
|
2902 |
return 0; |
|
2903 |
else
|
|
2904 |
return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1); |
|
2905 |
}
|
|
2906 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2907 |
void Integer::SetBit(size_t n, bool value) |
1
by weidai
Initial revision |
2908 |
{
|
2909 |
if (value) |
|
2910 |
{
|
|
2911 |
reg.CleanGrow(RoundupSize(BitsToWords(n+1))); |
|
2912 |
reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS)); |
|
2913 |
}
|
|
2914 |
else
|
|
2915 |
{
|
|
2916 |
if (n/WORD_BITS < reg.size()) |
|
2917 |
reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS)); |
|
2918 |
}
|
|
2919 |
}
|
|
2920 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2921 |
byte Integer::GetByte(size_t n) const |
1
by weidai
Initial revision |
2922 |
{
|
2923 |
if (n/WORD_SIZE >= reg.size()) |
|
2924 |
return 0; |
|
2925 |
else
|
|
2926 |
return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8)); |
|
2927 |
}
|
|
2928 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2929 |
void Integer::SetByte(size_t n, byte value) |
1
by weidai
Initial revision |
2930 |
{
|
2931 |
reg.CleanGrow(RoundupSize(BytesToWords(n+1))); |
|
2932 |
reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE)); |
|
2933 |
reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE)); |
|
2934 |
}
|
|
2935 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2936 |
lword Integer::GetBits(size_t i, size_t n) const |
1
by weidai
Initial revision |
2937 |
{
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
2938 |
lword v = 0; |
2939 |
assert(n <= sizeof(v)*8); |
|
1
by weidai
Initial revision |
2940 |
for (unsigned int j=0; j<n; j++) |
184
by weidai
port to MSVC .NET 2005 beta 2 |
2941 |
v |= lword(GetBit(i+j)) << j; |
1
by weidai
Initial revision |
2942 |
return v; |
2943 |
}
|
|
2944 |
||
2945 |
Integer Integer::operator-() const |
|
2946 |
{
|
|
2947 |
Integer result(*this); |
|
2948 |
result.Negate(); |
|
2949 |
return result; |
|
2950 |
}
|
|
2951 |
||
2952 |
Integer Integer::AbsoluteValue() const |
|
2953 |
{
|
|
2954 |
Integer result(*this); |
|
2955 |
result.sign = POSITIVE; |
|
2956 |
return result; |
|
2957 |
}
|
|
2958 |
||
2959 |
void Integer::swap(Integer &a) |
|
2960 |
{
|
|
2961 |
reg.swap(a.reg); |
|
2962 |
std::swap(sign, a.sign); |
|
2963 |
}
|
|
2964 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
2965 |
Integer::Integer(word value, size_t length) |
1
by weidai
Initial revision |
2966 |
: reg(RoundupSize(length)), sign(POSITIVE) |
2967 |
{
|
|
2968 |
reg[0] = value; |
|
2969 |
SetWords(reg+1, 0, reg.size()-1); |
|
2970 |
}
|
|
2971 |
||
2972 |
template <class T> |
|
2973 |
static Integer StringToInteger(const T *str) |
|
2974 |
{
|
|
202
by weidai
fix MSVC 2005 warnings |
2975 |
int radix; |
38
by weidai
STLport workaround |
2976 |
// GCC workaround
|
2977 |
// std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
|
|
1
by weidai
Initial revision |
2978 |
unsigned int length; |
2979 |
for (length = 0; str[length] != 0; length++) {} |
|
2980 |
||
2981 |
Integer v; |
|
2982 |
||
2983 |
if (length == 0) |
|
2984 |
return v; |
|
2985 |
||
2986 |
switch (str[length-1]) |
|
2987 |
{
|
|
2988 |
case 'h': |
|
2989 |
case 'H': |
|
2990 |
radix=16; |
|
2991 |
break; |
|
2992 |
case 'o': |
|
2993 |
case 'O': |
|
2994 |
radix=8; |
|
2995 |
break; |
|
2996 |
case 'b': |
|
2997 |
case 'B': |
|
2998 |
radix=2; |
|
2999 |
break; |
|
3000 |
default: |
|
3001 |
radix=10; |
|
3002 |
}
|
|
3003 |
||
3004 |
if (length > 2 && str[0] == '0' && str[1] == 'x') |
|
3005 |
radix = 16; |
|
3006 |
||
3007 |
for (unsigned i=0; i<length; i++) |
|
3008 |
{
|
|
202
by weidai
fix MSVC 2005 warnings |
3009 |
int digit; |
1
by weidai
Initial revision |
3010 |
|
3011 |
if (str[i] >= '0' && str[i] <= '9') |
|
3012 |
digit = str[i] - '0'; |
|
3013 |
else if (str[i] >= 'A' && str[i] <= 'F') |
|
3014 |
digit = str[i] - 'A' + 10; |
|
3015 |
else if (str[i] >= 'a' && str[i] <= 'f') |
|
3016 |
digit = str[i] - 'a' + 10; |
|
3017 |
else
|
|
3018 |
digit = radix; |
|
3019 |
||
3020 |
if (digit < radix) |
|
3021 |
{
|
|
3022 |
v *= radix; |
|
3023 |
v += digit; |
|
3024 |
}
|
|
3025 |
}
|
|
3026 |
||
3027 |
if (str[0] == '-') |
|
3028 |
v.Negate(); |
|
3029 |
||
3030 |
return v; |
|
3031 |
}
|
|
3032 |
||
3033 |
Integer::Integer(const char *str) |
|
3034 |
: reg(2), sign(POSITIVE) |
|
3035 |
{
|
|
3036 |
*this = StringToInteger(str); |
|
3037 |
}
|
|
3038 |
||
3039 |
Integer::Integer(const wchar_t *str) |
|
3040 |
: reg(2), sign(POSITIVE) |
|
3041 |
{
|
|
3042 |
*this = StringToInteger(str); |
|
3043 |
}
|
|
3044 |
||
3045 |
unsigned int Integer::WordCount() const |
|
3046 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3047 |
return (unsigned int)CountWords(reg, reg.size()); |
1
by weidai
Initial revision |
3048 |
}
|
3049 |
||
3050 |
unsigned int Integer::ByteCount() const |
|
3051 |
{
|
|
3052 |
unsigned wordCount = WordCount(); |
|
3053 |
if (wordCount) |
|
3054 |
return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]); |
|
3055 |
else
|
|
3056 |
return 0; |
|
3057 |
}
|
|
3058 |
||
3059 |
unsigned int Integer::BitCount() const |
|
3060 |
{
|
|
3061 |
unsigned wordCount = WordCount(); |
|
3062 |
if (wordCount) |
|
3063 |
return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]); |
|
3064 |
else
|
|
3065 |
return 0; |
|
3066 |
}
|
|
3067 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3068 |
void Integer::Decode(const byte *input, size_t inputLen, Signedness s) |
1
by weidai
Initial revision |
3069 |
{
|
3070 |
StringStore store(input, inputLen); |
|
3071 |
Decode(store, inputLen, s); |
|
3072 |
}
|
|
3073 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3074 |
void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s) |
1
by weidai
Initial revision |
3075 |
{
|
3076 |
assert(bt.MaxRetrievable() >= inputLen); |
|
3077 |
||
3078 |
byte b; |
|
3079 |
bt.Peek(b); |
|
3080 |
sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE; |
|
3081 |
||
3082 |
while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff)) |
|
3083 |
{
|
|
3084 |
bt.Skip(1); |
|
3085 |
inputLen--; |
|
3086 |
bt.Peek(b); |
|
3087 |
}
|
|
3088 |
||
3089 |
reg.CleanNew(RoundupSize(BytesToWords(inputLen))); |
|
3090 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3091 |
for (size_t i=inputLen; i > 0; i--) |
1
by weidai
Initial revision |
3092 |
{
|
3093 |
bt.Get(b); |
|
100
by weidai
fix bugs in 64-bit CPU support |
3094 |
reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; |
1
by weidai
Initial revision |
3095 |
}
|
3096 |
||
3097 |
if (sign == NEGATIVE) |
|
3098 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3099 |
for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++) |
100
by weidai
fix bugs in 64-bit CPU support |
3100 |
reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8; |
1
by weidai
Initial revision |
3101 |
TwosComplement(reg, reg.size()); |
3102 |
}
|
|
3103 |
}
|
|
3104 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3105 |
size_t Integer::MinEncodedSize(Signedness signedness) const |
1
by weidai
Initial revision |
3106 |
{
|
3107 |
unsigned int outputLen = STDMAX(1U, ByteCount()); |
|
3108 |
if (signedness == UNSIGNED) |
|
3109 |
return outputLen; |
|
3110 |
if (NotNegative() && (GetByte(outputLen-1) & 0x80)) |
|
3111 |
outputLen++; |
|
3112 |
if (IsNegative() && *this < -Power2(outputLen*8-1)) |
|
3113 |
outputLen++; |
|
3114 |
return outputLen; |
|
3115 |
}
|
|
3116 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3117 |
void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const |
1
by weidai
Initial revision |
3118 |
{
|
3119 |
ArraySink sink(output, outputLen); |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3120 |
Encode(sink, outputLen, signedness); |
1
by weidai
Initial revision |
3121 |
}
|
3122 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3123 |
void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const |
1
by weidai
Initial revision |
3124 |
{
|
3125 |
if (signedness == UNSIGNED || NotNegative()) |
|
3126 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3127 |
for (size_t i=outputLen; i > 0; i--) |
1
by weidai
Initial revision |
3128 |
bt.Put(GetByte(i-1)); |
3129 |
}
|
|
3130 |
else
|
|
3131 |
{
|
|
3132 |
// take two's complement of *this
|
|
203
by weidai
fix Integer::Encode |
3133 |
Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this; |
184
by weidai
port to MSVC .NET 2005 beta 2 |
3134 |
temp.Encode(bt, outputLen, UNSIGNED); |
1
by weidai
Initial revision |
3135 |
}
|
3136 |
}
|
|
3137 |
||
3138 |
void Integer::DEREncode(BufferedTransformation &bt) const |
|
3139 |
{
|
|
3140 |
DERGeneralEncoder enc(bt, INTEGER); |
|
3141 |
Encode(enc, MinEncodedSize(SIGNED), SIGNED); |
|
3142 |
enc.MessageEnd(); |
|
3143 |
}
|
|
3144 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3145 |
void Integer::BERDecode(const byte *input, size_t len) |
1
by weidai
Initial revision |
3146 |
{
|
3147 |
StringStore store(input, len); |
|
3148 |
BERDecode(store); |
|
3149 |
}
|
|
3150 |
||
3151 |
void Integer::BERDecode(BufferedTransformation &bt) |
|
3152 |
{
|
|
3153 |
BERGeneralDecoder dec(bt, INTEGER); |
|
3154 |
if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength()) |
|
3155 |
BERDecodeError(); |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3156 |
Decode(dec, (size_t)dec.RemainingLength(), SIGNED); |
1
by weidai
Initial revision |
3157 |
dec.MessageEnd(); |
3158 |
}
|
|
3159 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3160 |
void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const |
1
by weidai
Initial revision |
3161 |
{
|
3162 |
DERGeneralEncoder enc(bt, OCTET_STRING); |
|
3163 |
Encode(enc, length); |
|
3164 |
enc.MessageEnd(); |
|
3165 |
}
|
|
3166 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3167 |
void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length) |
1
by weidai
Initial revision |
3168 |
{
|
3169 |
BERGeneralDecoder dec(bt, OCTET_STRING); |
|
3170 |
if (!dec.IsDefiniteLength() || dec.RemainingLength() != length) |
|
3171 |
BERDecodeError(); |
|
3172 |
Decode(dec, length); |
|
3173 |
dec.MessageEnd(); |
|
3174 |
}
|
|
3175 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3176 |
size_t Integer::OpenPGPEncode(byte *output, size_t len) const |
1
by weidai
Initial revision |
3177 |
{
|
3178 |
ArraySink sink(output, len); |
|
3179 |
return OpenPGPEncode(sink); |
|
3180 |
}
|
|
3181 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3182 |
size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const |
1
by weidai
Initial revision |
3183 |
{
|
3184 |
word16 bitCount = BitCount(); |
|
3185 |
bt.PutWord16(bitCount); |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3186 |
size_t byteCount = BitsToBytes(bitCount); |
3187 |
Encode(bt, byteCount); |
|
3188 |
return 2 + byteCount; |
|
1
by weidai
Initial revision |
3189 |
}
|
3190 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3191 |
void Integer::OpenPGPDecode(const byte *input, size_t len) |
1
by weidai
Initial revision |
3192 |
{
|
3193 |
StringStore store(input, len); |
|
3194 |
OpenPGPDecode(store); |
|
3195 |
}
|
|
3196 |
||
3197 |
void Integer::OpenPGPDecode(BufferedTransformation &bt) |
|
3198 |
{
|
|
3199 |
word16 bitCount; |
|
3200 |
if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount)) |
|
3201 |
throw OpenPGPDecodeErr(); |
|
3202 |
Decode(bt, BitsToBytes(bitCount)); |
|
3203 |
}
|
|
3204 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3205 |
void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits) |
1
by weidai
Initial revision |
3206 |
{
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3207 |
const size_t nbytes = nbits/8 + 1; |
1
by weidai
Initial revision |
3208 |
SecByteBlock buf(nbytes); |
3209 |
rng.GenerateBlock(buf, nbytes); |
|
3210 |
if (nbytes) |
|
3211 |
buf[0] = (byte)Crop(buf[0], nbits % 8); |
|
3212 |
Decode(buf, nbytes, UNSIGNED); |
|
3213 |
}
|
|
3214 |
||
3215 |
void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max) |
|
3216 |
{
|
|
3217 |
if (min > max) |
|
3218 |
throw InvalidArgument("Integer: Min must be no greater than Max"); |
|
3219 |
||
3220 |
Integer range = max - min; |
|
3221 |
const unsigned int nbits = range.BitCount(); |
|
3222 |
||
3223 |
do
|
|
3224 |
{
|
|
3225 |
Randomize(rng, nbits); |
|
3226 |
}
|
|
3227 |
while (*this > range); |
|
3228 |
||
3229 |
*this += min; |
|
3230 |
}
|
|
3231 |
||
3232 |
bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) |
|
3233 |
{
|
|
3234 |
return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod)); |
|
3235 |
}
|
|
3236 |
||
3237 |
class KDF2_RNG : public RandomNumberGenerator |
|
3238 |
{
|
|
3239 |
public: |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3240 |
KDF2_RNG(const byte *seed, size_t seedSize) |
1
by weidai
Initial revision |
3241 |
: m_counter(0), m_counterAndSeed(seedSize + 4) |
3242 |
{
|
|
3243 |
memcpy(m_counterAndSeed + 4, seed, seedSize); |
|
3244 |
}
|
|
3245 |
||
250
by weidai
fix compile with Sun CC 64-bit |
3246 |
void GenerateBlock(byte *output, size_t size) |
1
by weidai
Initial revision |
3247 |
{
|
270
by weidai
MMX/SSE2 optimizations |
3248 |
PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter); |
1
by weidai
Initial revision |
3249 |
++m_counter; |
86
by weidai
added support for using encoding parameters and key derivation parameters |
3250 |
P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0); |
1
by weidai
Initial revision |
3251 |
}
|
3252 |
||
3253 |
private: |
|
3254 |
word32 m_counter; |
|
3255 |
SecByteBlock m_counterAndSeed; |
|
3256 |
};
|
|
3257 |
||
3258 |
bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms) |
|
3259 |
{
|
|
3260 |
Integer min = params.GetValueWithDefault("Min", Integer::Zero()); |
|
3261 |
Integer max; |
|
3262 |
if (!params.GetValue("Max", max)) |
|
3263 |
{
|
|
3264 |
int bitLength; |
|
3265 |
if (params.GetIntValue("BitLength", bitLength)) |
|
3266 |
max = Integer::Power2(bitLength); |
|
3267 |
else
|
|
3268 |
throw InvalidArgument("Integer: missing Max argument"); |
|
3269 |
}
|
|
3270 |
if (min > max) |
|
3271 |
throw InvalidArgument("Integer: Min must be no greater than Max"); |
|
3272 |
||
3273 |
Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero()); |
|
3274 |
Integer mod = params.GetValueWithDefault("Mod", Integer::One()); |
|
3275 |
||
3276 |
if (equiv.IsNegative() || equiv >= mod) |
|
3277 |
throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument"); |
|
3278 |
||
3279 |
Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY); |
|
3280 |
||
3281 |
member_ptr<KDF2_RNG> kdf2Rng; |
|
3282 |
ConstByteArrayParameter seed; |
|
412
by weidai
changes for 5.6: |
3283 |
if (params.GetValue(Name::Seed(), seed)) |
1
by weidai
Initial revision |
3284 |
{
|
3285 |
ByteQueue bq; |
|
3286 |
DERSequenceEncoder seq(bq); |
|
3287 |
min.DEREncode(seq); |
|
3288 |
max.DEREncode(seq); |
|
3289 |
equiv.DEREncode(seq); |
|
3290 |
mod.DEREncode(seq); |
|
3291 |
DEREncodeUnsigned(seq, rnType); |
|
3292 |
DEREncodeOctetString(seq, seed.begin(), seed.size()); |
|
3293 |
seq.MessageEnd(); |
|
3294 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3295 |
SecByteBlock finalSeed((size_t)bq.MaxRetrievable()); |
1
by weidai
Initial revision |
3296 |
bq.Get(finalSeed, finalSeed.size()); |
3297 |
kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size())); |
|
3298 |
}
|
|
3299 |
RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng; |
|
3300 |
||
3301 |
switch (rnType) |
|
3302 |
{
|
|
3303 |
case ANY: |
|
3304 |
if (mod == One()) |
|
3305 |
Randomize(rng, min, max); |
|
3306 |
else
|
|
3307 |
{
|
|
3308 |
Integer min1 = min + (equiv-min)%mod; |
|
3309 |
if (max < min1) |
|
3310 |
return false; |
|
3311 |
Randomize(rng, Zero(), (max - min1) / mod); |
|
3312 |
*this *= mod; |
|
3313 |
*this += min1; |
|
3314 |
}
|
|
3315 |
return true; |
|
3316 |
||
3317 |
case PRIME: |
|
3318 |
{
|
|
86
by weidai
added support for using encoding parameters and key derivation parameters |
3319 |
const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL); |
1
by weidai
Initial revision |
3320 |
|
3321 |
int i; |
|
3322 |
i = 0; |
|
3323 |
while (1) |
|
3324 |
{
|
|
3325 |
if (++i==16) |
|
3326 |
{
|
|
3327 |
// check if there are any suitable primes in [min, max]
|
|
3328 |
Integer first = min; |
|
3329 |
if (FirstPrime(first, max, equiv, mod, pSelector)) |
|
3330 |
{
|
|
3331 |
// if there is only one suitable prime, we're done
|
|
3332 |
*this = first; |
|
3333 |
if (!FirstPrime(first, max, equiv, mod, pSelector)) |
|
3334 |
return true; |
|
3335 |
}
|
|
3336 |
else
|
|
3337 |
return false; |
|
3338 |
}
|
|
3339 |
||
3340 |
Randomize(rng, min, max); |
|
3341 |
if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector)) |
|
3342 |
return true; |
|
3343 |
}
|
|
3344 |
}
|
|
3345 |
||
3346 |
default: |
|
3347 |
throw InvalidArgument("Integer: invalid RandomNumberType argument"); |
|
3348 |
}
|
|
3349 |
}
|
|
3350 |
||
3351 |
std::istream& operator>>(std::istream& in, Integer &a) |
|
3352 |
{
|
|
3353 |
char c; |
|
3354 |
unsigned int length = 0; |
|
3355 |
SecBlock<char> str(length + 16); |
|
3356 |
||
3357 |
std::ws(in); |
|
3358 |
||
3359 |
do
|
|
3360 |
{
|
|
3361 |
in.read(&c, 1); |
|
3362 |
str[length++] = c; |
|
3363 |
if (length >= str.size()) |
|
3364 |
str.Grow(length + 16); |
|
3365 |
}
|
|
3366 |
while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.')); |
|
3367 |
||
3368 |
if (in.gcount()) |
|
3369 |
in.putback(c); |
|
3370 |
str[length-1] = '\0'; |
|
3371 |
a = Integer(str); |
|
3372 |
||
3373 |
return in; |
|
3374 |
}
|
|
3375 |
||
3376 |
std::ostream& operator<<(std::ostream& out, const Integer &a) |
|
3377 |
{
|
|
3378 |
// Get relevant conversion specifications from ostream.
|
|
3379 |
long f = out.flags() & std::ios::basefield; // Get base digits. |
|
3380 |
int base, block; |
|
3381 |
char suffix; |
|
3382 |
switch(f) |
|
3383 |
{
|
|
3384 |
case std::ios::oct : |
|
3385 |
base = 8; |
|
3386 |
block = 8; |
|
3387 |
suffix = 'o'; |
|
3388 |
break; |
|
3389 |
case std::ios::hex : |
|
3390 |
base = 16; |
|
3391 |
block = 4; |
|
3392 |
suffix = 'h'; |
|
3393 |
break; |
|
3394 |
default : |
|
3395 |
base = 10; |
|
3396 |
block = 3; |
|
3397 |
suffix = '.'; |
|
3398 |
}
|
|
3399 |
||
3400 |
Integer temp1=a, temp2; |
|
421
by weidai
from Jeffery Walton: move *.dat files into TestData, make Integer operator<< respect ios::uppercase flag |
3401 |
|
1
by weidai
Initial revision |
3402 |
if (a.IsNegative()) |
3403 |
{
|
|
3404 |
out << '-'; |
|
3405 |
temp1.Negate(); |
|
3406 |
}
|
|
3407 |
||
3408 |
if (!a) |
|
3409 |
out << '0'; |
|
3410 |
||
421
by weidai
from Jeffery Walton: move *.dat files into TestData, make Integer operator<< respect ios::uppercase flag |
3411 |
static const char upper[]="0123456789ABCDEF"; |
3412 |
static const char lower[]="0123456789abcdef"; |
|
3413 |
||
3414 |
const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower; |
|
3415 |
unsigned i=0; |
|
3416 |
SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1); |
|
3417 |
||
1
by weidai
Initial revision |
3418 |
while (!!temp1) |
3419 |
{
|
|
3420 |
word digit; |
|
3421 |
Integer::Divide(digit, temp2, temp1, base); |
|
3422 |
s[i++]=vec[digit]; |
|
421
by weidai
from Jeffery Walton: move *.dat files into TestData, make Integer operator<< respect ios::uppercase flag |
3423 |
temp1.swap(temp2); |
1
by weidai
Initial revision |
3424 |
}
|
3425 |
||
3426 |
while (i--) |
|
3427 |
{
|
|
3428 |
out << s[i]; |
|
3429 |
// if (i && !(i%block))
|
|
3430 |
// out << ",";
|
|
3431 |
}
|
|
3432 |
return out << suffix; |
|
3433 |
}
|
|
3434 |
||
3435 |
Integer& Integer::operator++() |
|
3436 |
{
|
|
3437 |
if (NotNegative()) |
|
3438 |
{
|
|
3439 |
if (Increment(reg, reg.size())) |
|
3440 |
{
|
|
3441 |
reg.CleanGrow(2*reg.size()); |
|
3442 |
reg[reg.size()/2]=1; |
|
3443 |
}
|
|
3444 |
}
|
|
3445 |
else
|
|
3446 |
{
|
|
3447 |
word borrow = Decrement(reg, reg.size()); |
|
3448 |
assert(!borrow); |
|
3449 |
if (WordCount()==0) |
|
3450 |
*this = Zero(); |
|
3451 |
}
|
|
3452 |
return *this; |
|
3453 |
}
|
|
3454 |
||
3455 |
Integer& Integer::operator--() |
|
3456 |
{
|
|
3457 |
if (IsNegative()) |
|
3458 |
{
|
|
3459 |
if (Increment(reg, reg.size())) |
|
3460 |
{
|
|
3461 |
reg.CleanGrow(2*reg.size()); |
|
3462 |
reg[reg.size()/2]=1; |
|
3463 |
}
|
|
3464 |
}
|
|
3465 |
else
|
|
3466 |
{
|
|
3467 |
if (Decrement(reg, reg.size())) |
|
3468 |
*this = -One(); |
|
3469 |
}
|
|
3470 |
return *this; |
|
3471 |
}
|
|
3472 |
||
3473 |
void PositiveAdd(Integer &sum, const Integer &a, const Integer& b) |
|
3474 |
{
|
|
202
by weidai
fix MSVC 2005 warnings |
3475 |
int carry; |
1
by weidai
Initial revision |
3476 |
if (a.reg.size() == b.reg.size()) |
3477 |
carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); |
|
3478 |
else if (a.reg.size() > b.reg.size()) |
|
3479 |
{
|
|
3480 |
carry = Add(sum.reg, a.reg, b.reg, b.reg.size()); |
|
3481 |
CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size()); |
|
3482 |
carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry); |
|
3483 |
}
|
|
3484 |
else
|
|
3485 |
{
|
|
3486 |
carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); |
|
3487 |
CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size()); |
|
3488 |
carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry); |
|
3489 |
}
|
|
3490 |
||
3491 |
if (carry) |
|
3492 |
{
|
|
3493 |
sum.reg.CleanGrow(2*sum.reg.size()); |
|
3494 |
sum.reg[sum.reg.size()/2] = 1; |
|
3495 |
}
|
|
3496 |
sum.sign = Integer::POSITIVE; |
|
3497 |
}
|
|
3498 |
||
3499 |
void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b) |
|
3500 |
{
|
|
3501 |
unsigned aSize = a.WordCount(); |
|
3502 |
aSize += aSize%2; |
|
3503 |
unsigned bSize = b.WordCount(); |
|
3504 |
bSize += bSize%2; |
|
3505 |
||
3506 |
if (aSize == bSize) |
|
3507 |
{
|
|
3508 |
if (Compare(a.reg, b.reg, aSize) >= 0) |
|
3509 |
{
|
|
3510 |
Subtract(diff.reg, a.reg, b.reg, aSize); |
|
3511 |
diff.sign = Integer::POSITIVE; |
|
3512 |
}
|
|
3513 |
else
|
|
3514 |
{
|
|
3515 |
Subtract(diff.reg, b.reg, a.reg, aSize); |
|
3516 |
diff.sign = Integer::NEGATIVE; |
|
3517 |
}
|
|
3518 |
}
|
|
3519 |
else if (aSize > bSize) |
|
3520 |
{
|
|
3521 |
word borrow = Subtract(diff.reg, a.reg, b.reg, bSize); |
|
3522 |
CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize); |
|
3523 |
borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow); |
|
3524 |
assert(!borrow); |
|
3525 |
diff.sign = Integer::POSITIVE; |
|
3526 |
}
|
|
3527 |
else
|
|
3528 |
{
|
|
3529 |
word borrow = Subtract(diff.reg, b.reg, a.reg, aSize); |
|
3530 |
CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize); |
|
3531 |
borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow); |
|
3532 |
assert(!borrow); |
|
3533 |
diff.sign = Integer::NEGATIVE; |
|
3534 |
}
|
|
3535 |
}
|
|
3536 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3537 |
// MSVC .NET 2003 workaround
|
3538 |
template <class T> inline const T& STDMAX2(const T& a, const T& b) |
|
3539 |
{
|
|
3540 |
return a < b ? b : a; |
|
3541 |
}
|
|
3542 |
||
1
by weidai
Initial revision |
3543 |
Integer Integer::Plus(const Integer& b) const |
3544 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3545 |
Integer sum((word)0, STDMAX2(reg.size(), b.reg.size())); |
1
by weidai
Initial revision |
3546 |
if (NotNegative()) |
3547 |
{
|
|
3548 |
if (b.NotNegative()) |
|
3549 |
PositiveAdd(sum, *this, b); |
|
3550 |
else
|
|
3551 |
PositiveSubtract(sum, *this, b); |
|
3552 |
}
|
|
3553 |
else
|
|
3554 |
{
|
|
3555 |
if (b.NotNegative()) |
|
3556 |
PositiveSubtract(sum, b, *this); |
|
3557 |
else
|
|
3558 |
{
|
|
3559 |
PositiveAdd(sum, *this, b); |
|
3560 |
sum.sign = Integer::NEGATIVE; |
|
3561 |
}
|
|
3562 |
}
|
|
3563 |
return sum; |
|
3564 |
}
|
|
3565 |
||
3566 |
Integer& Integer::operator+=(const Integer& t) |
|
3567 |
{
|
|
3568 |
reg.CleanGrow(t.reg.size()); |
|
3569 |
if (NotNegative()) |
|
3570 |
{
|
|
3571 |
if (t.NotNegative()) |
|
3572 |
PositiveAdd(*this, *this, t); |
|
3573 |
else
|
|
3574 |
PositiveSubtract(*this, *this, t); |
|
3575 |
}
|
|
3576 |
else
|
|
3577 |
{
|
|
3578 |
if (t.NotNegative()) |
|
3579 |
PositiveSubtract(*this, t, *this); |
|
3580 |
else
|
|
3581 |
{
|
|
3582 |
PositiveAdd(*this, *this, t); |
|
3583 |
sign = Integer::NEGATIVE; |
|
3584 |
}
|
|
3585 |
}
|
|
3586 |
return *this; |
|
3587 |
}
|
|
3588 |
||
3589 |
Integer Integer::Minus(const Integer& b) const |
|
3590 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3591 |
Integer diff((word)0, STDMAX2(reg.size(), b.reg.size())); |
1
by weidai
Initial revision |
3592 |
if (NotNegative()) |
3593 |
{
|
|
3594 |
if (b.NotNegative()) |
|
3595 |
PositiveSubtract(diff, *this, b); |
|
3596 |
else
|
|
3597 |
PositiveAdd(diff, *this, b); |
|
3598 |
}
|
|
3599 |
else
|
|
3600 |
{
|
|
3601 |
if (b.NotNegative()) |
|
3602 |
{
|
|
3603 |
PositiveAdd(diff, *this, b); |
|
3604 |
diff.sign = Integer::NEGATIVE; |
|
3605 |
}
|
|
3606 |
else
|
|
3607 |
PositiveSubtract(diff, b, *this); |
|
3608 |
}
|
|
3609 |
return diff; |
|
3610 |
}
|
|
3611 |
||
3612 |
Integer& Integer::operator-=(const Integer& t) |
|
3613 |
{
|
|
3614 |
reg.CleanGrow(t.reg.size()); |
|
3615 |
if (NotNegative()) |
|
3616 |
{
|
|
3617 |
if (t.NotNegative()) |
|
3618 |
PositiveSubtract(*this, *this, t); |
|
3619 |
else
|
|
3620 |
PositiveAdd(*this, *this, t); |
|
3621 |
}
|
|
3622 |
else
|
|
3623 |
{
|
|
3624 |
if (t.NotNegative()) |
|
3625 |
{
|
|
3626 |
PositiveAdd(*this, *this, t); |
|
3627 |
sign = Integer::NEGATIVE; |
|
3628 |
}
|
|
3629 |
else
|
|
3630 |
PositiveSubtract(*this, t, *this); |
|
3631 |
}
|
|
3632 |
return *this; |
|
3633 |
}
|
|
3634 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3635 |
Integer& Integer::operator<<=(size_t n) |
1
by weidai
Initial revision |
3636 |
{
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3637 |
const size_t wordCount = WordCount(); |
3638 |
const size_t shiftWords = n / WORD_BITS; |
|
3639 |
const unsigned int shiftBits = (unsigned int)(n % WORD_BITS); |
|
1
by weidai
Initial revision |
3640 |
|
3641 |
reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n))); |
|
3642 |
ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords); |
|
3643 |
ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits); |
|
3644 |
return *this; |
|
3645 |
}
|
|
3646 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3647 |
Integer& Integer::operator>>=(size_t n) |
1
by weidai
Initial revision |
3648 |
{
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3649 |
const size_t wordCount = WordCount(); |
3650 |
const size_t shiftWords = n / WORD_BITS; |
|
3651 |
const unsigned int shiftBits = (unsigned int)(n % WORD_BITS); |
|
1
by weidai
Initial revision |
3652 |
|
3653 |
ShiftWordsRightByWords(reg, wordCount, shiftWords); |
|
3654 |
if (wordCount > shiftWords) |
|
3655 |
ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits); |
|
3656 |
if (IsNegative() && WordCount()==0) // avoid -0 |
|
3657 |
*this = Zero(); |
|
3658 |
return *this; |
|
3659 |
}
|
|
3660 |
||
3661 |
void PositiveMultiply(Integer &product, const Integer &a, const Integer &b) |
|
3662 |
{
|
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
3663 |
size_t aSize = RoundupSize(a.WordCount()); |
3664 |
size_t bSize = RoundupSize(b.WordCount()); |
|
1
by weidai
Initial revision |
3665 |
|
3666 |
product.reg.CleanNew(RoundupSize(aSize+bSize)); |
|
3667 |
product.sign = Integer::POSITIVE; |
|
3668 |
||
270
by weidai
MMX/SSE2 optimizations |
3669 |
IntegerSecBlock workspace(aSize + bSize); |
1
by weidai
Initial revision |
3670 |
AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize); |
3671 |
}
|
|
3672 |
||
3673 |
void Multiply(Integer &product, const Integer &a, const Integer &b) |
|
3674 |
{
|
|
3675 |
PositiveMultiply(product, a, b); |
|
3676 |
||
3677 |
if (a.NotNegative() != b.NotNegative()) |
|
3678 |
product.Negate(); |
|
3679 |
}
|
|
3680 |
||
3681 |
Integer Integer::Times(const Integer &b) const |
|
3682 |
{
|
|
3683 |
Integer product; |
|
3684 |
Multiply(product, *this, b); |
|
3685 |
return product; |
|
3686 |
}
|
|
3687 |
||
3688 |
/*
|
|
3689 |
void PositiveDivide(Integer &remainder, Integer "ient,
|
|
3690 |
const Integer ÷nd, const Integer &divisor)
|
|
3691 |
{
|
|
3692 |
remainder.reg.CleanNew(divisor.reg.size());
|
|
3693 |
remainder.sign = Integer::POSITIVE;
|
|
3694 |
quotient.reg.New(0);
|
|
3695 |
quotient.sign = Integer::POSITIVE;
|
|
3696 |
unsigned i=dividend.BitCount();
|
|
3697 |
while (i--)
|
|
3698 |
{
|
|
3699 |
word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
|
|
3700 |
remainder.reg[0] |= dividend[i];
|
|
3701 |
if (overflow || remainder >= divisor)
|
|
3702 |
{
|
|
3703 |
Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
|
|
3704 |
quotient.SetBit(i);
|
|
3705 |
}
|
|
3706 |
}
|
|
3707 |
}
|
|
3708 |
*/
|
|
3709 |
||
3710 |
void PositiveDivide(Integer &remainder, Integer "ient, |
|
3711 |
const Integer &a, const Integer &b) |
|
3712 |
{
|
|
3713 |
unsigned aSize = a.WordCount(); |
|
3714 |
unsigned bSize = b.WordCount(); |
|
3715 |
||
3716 |
if (!bSize) |
|
3717 |
throw Integer::DivideByZero(); |
|
3718 |
||
412
by weidai
changes for 5.6: |
3719 |
if (aSize < bSize) |
1
by weidai
Initial revision |
3720 |
{
|
3721 |
remainder = a; |
|
3722 |
remainder.sign = Integer::POSITIVE; |
|
3723 |
quotient = Integer::Zero(); |
|
3724 |
return; |
|
3725 |
}
|
|
3726 |
||
3727 |
aSize += aSize%2; // round up to next even number |
|
3728 |
bSize += bSize%2; |
|
3729 |
||
3730 |
remainder.reg.CleanNew(RoundupSize(bSize)); |
|
3731 |
remainder.sign = Integer::POSITIVE; |
|
3732 |
quotient.reg.CleanNew(RoundupSize(aSize-bSize+2)); |
|
3733 |
quotient.sign = Integer::POSITIVE; |
|
3734 |
||
270
by weidai
MMX/SSE2 optimizations |
3735 |
IntegerSecBlock T(aSize+3*(bSize+2)); |
1
by weidai
Initial revision |
3736 |
Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize); |
3737 |
}
|
|
3738 |
||
3739 |
void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor) |
|
3740 |
{
|
|
3741 |
PositiveDivide(remainder, quotient, dividend, divisor); |
|
3742 |
||
3743 |
if (dividend.IsNegative()) |
|
3744 |
{
|
|
3745 |
quotient.Negate(); |
|
3746 |
if (remainder.NotZero()) |
|
3747 |
{
|
|
3748 |
--quotient; |
|
3749 |
remainder = divisor.AbsoluteValue() - remainder; |
|
3750 |
}
|
|
3751 |
}
|
|
3752 |
||
3753 |
if (divisor.IsNegative()) |
|
3754 |
quotient.Negate(); |
|
3755 |
}
|
|
3756 |
||
3757 |
void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n) |
|
3758 |
{
|
|
3759 |
q = a; |
|
3760 |
q >>= n; |
|
3761 |
||
184
by weidai
port to MSVC .NET 2005 beta 2 |
3762 |
const size_t wordCount = BitsToWords(n); |
1
by weidai
Initial revision |
3763 |
if (wordCount <= a.WordCount()) |
3764 |
{
|
|
3765 |
r.reg.resize(RoundupSize(wordCount)); |
|
3766 |
CopyWords(r.reg, a.reg, wordCount); |
|
3767 |
SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount); |
|
3768 |
if (n % WORD_BITS != 0) |
|
202
by weidai
fix MSVC 2005 warnings |
3769 |
r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS)); |
1
by weidai
Initial revision |
3770 |
}
|
3771 |
else
|
|
3772 |
{
|
|
3773 |
r.reg.resize(RoundupSize(a.WordCount())); |
|
3774 |
CopyWords(r.reg, a.reg, r.reg.size()); |
|
3775 |
}
|
|
3776 |
r.sign = POSITIVE; |
|
3777 |
||
3778 |
if (a.IsNegative() && r.NotZero()) |
|
3779 |
{
|
|
3780 |
--q; |
|
3781 |
r = Power2(n) - r; |
|
3782 |
}
|
|
3783 |
}
|
|
3784 |
||
3785 |
Integer Integer::DividedBy(const Integer &b) const |
|
3786 |
{
|
|
3787 |
Integer remainder, quotient; |
|
3788 |
Integer::Divide(remainder, quotient, *this, b); |
|
3789 |
return quotient; |
|
3790 |
}
|
|
3791 |
||
3792 |
Integer Integer::Modulo(const Integer &b) const |
|
3793 |
{
|
|
3794 |
Integer remainder, quotient; |
|
3795 |
Integer::Divide(remainder, quotient, *this, b); |
|
3796 |
return remainder; |
|
3797 |
}
|
|
3798 |
||
3799 |
void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor) |
|
3800 |
{
|
|
3801 |
if (!divisor) |
|
3802 |
throw Integer::DivideByZero(); |
|
3803 |
||
3804 |
assert(divisor); |
|
3805 |
||
3806 |
if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 |
|
3807 |
{
|
|
3808 |
quotient = dividend >> (BitPrecision(divisor)-1); |
|
3809 |
remainder = dividend.reg[0] & (divisor-1); |
|
3810 |
return; |
|
3811 |
}
|
|
3812 |
||
3813 |
unsigned int i = dividend.WordCount(); |
|
3814 |
quotient.reg.CleanNew(RoundupSize(i)); |
|
3815 |
remainder = 0; |
|
3816 |
while (i--) |
|
3817 |
{
|
|
100
by weidai
fix bugs in 64-bit CPU support |
3818 |
quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor; |
3819 |
remainder = DWord(dividend.reg[i], remainder) % divisor; |
|
1
by weidai
Initial revision |
3820 |
}
|
3821 |
||
3822 |
if (dividend.NotNegative()) |
|
3823 |
quotient.sign = POSITIVE; |
|
3824 |
else
|
|
3825 |
{
|
|
3826 |
quotient.sign = NEGATIVE; |
|
3827 |
if (remainder) |
|
3828 |
{
|
|
3829 |
--quotient; |
|
3830 |
remainder = divisor - remainder; |
|
3831 |
}
|
|
3832 |
}
|
|
3833 |
}
|
|
3834 |
||
3835 |
Integer Integer::DividedBy(word b) const |
|
3836 |
{
|
|
3837 |
word remainder; |
|
3838 |
Integer quotient; |
|
3839 |
Integer::Divide(remainder, quotient, *this, b); |
|
3840 |
return quotient; |
|
3841 |
}
|
|
3842 |
||
3843 |
word Integer::Modulo(word divisor) const |
|
3844 |
{
|
|
3845 |
if (!divisor) |
|
3846 |
throw Integer::DivideByZero(); |
|
3847 |
||
3848 |
assert(divisor); |
|
3849 |
||
3850 |
word remainder; |
|
3851 |
||
3852 |
if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 |
|
3853 |
remainder = reg[0] & (divisor-1); |
|
3854 |
else
|
|
3855 |
{
|
|
3856 |
unsigned int i = WordCount(); |
|
3857 |
||
3858 |
if (divisor <= 5) |
|
3859 |
{
|
|
100
by weidai
fix bugs in 64-bit CPU support |
3860 |
DWord sum(0, 0); |
1
by weidai
Initial revision |
3861 |
while (i--) |
3862 |
sum += reg[i]; |
|
100
by weidai
fix bugs in 64-bit CPU support |
3863 |
remainder = sum % divisor; |
1
by weidai
Initial revision |
3864 |
}
|
3865 |
else
|
|
3866 |
{
|
|
3867 |
remainder = 0; |
|
3868 |
while (i--) |
|
100
by weidai
fix bugs in 64-bit CPU support |
3869 |
remainder = DWord(reg[i], remainder) % divisor; |
1
by weidai
Initial revision |
3870 |
}
|
3871 |
}
|
|
3872 |
||
3873 |
if (IsNegative() && remainder) |
|
3874 |
remainder = divisor - remainder; |
|
3875 |
||
3876 |
return remainder; |
|
3877 |
}
|
|
3878 |
||
3879 |
void Integer::Negate() |
|
3880 |
{
|
|
3881 |
if (!!(*this)) // don't flip sign if *this==0 |
|
3882 |
sign = Sign(1-sign); |
|
3883 |
}
|
|
3884 |
||
3885 |
int Integer::PositiveCompare(const Integer& t) const |
|
3886 |
{
|
|
3887 |
unsigned size = WordCount(), tSize = t.WordCount(); |
|
3888 |
||
3889 |
if (size == tSize) |
|
3890 |
return CryptoPP::Compare(reg, t.reg, size); |
|
3891 |
else
|
|
3892 |
return size > tSize ? 1 : -1; |
|
3893 |
}
|
|
3894 |
||
3895 |
int Integer::Compare(const Integer& t) const |
|
3896 |
{
|
|
3897 |
if (NotNegative()) |
|
3898 |
{
|
|
3899 |
if (t.NotNegative()) |
|
3900 |
return PositiveCompare(t); |
|
3901 |
else
|
|
3902 |
return 1; |
|
3903 |
}
|
|
3904 |
else
|
|
3905 |
{
|
|
3906 |
if (t.NotNegative()) |
|
3907 |
return -1; |
|
3908 |
else
|
|
3909 |
return -PositiveCompare(t); |
|
3910 |
}
|
|
3911 |
}
|
|
3912 |
||
3913 |
Integer Integer::SquareRoot() const |
|
3914 |
{
|
|
3915 |
if (!IsPositive()) |
|
3916 |
return Zero(); |
|
3917 |
||
3918 |
// overestimate square root
|
|
3919 |
Integer x, y = Power2((BitCount()+1)/2); |
|
3920 |
assert(y*y >= *this); |
|
3921 |
||
3922 |
do
|
|
3923 |
{
|
|
3924 |
x = y; |
|
3925 |
y = (x + *this/x) >> 1; |
|
3926 |
} while (y<x); |
|
3927 |
||
3928 |
return x; |
|
3929 |
}
|
|
3930 |
||
3931 |
bool Integer::IsSquare() const |
|
3932 |
{
|
|
3933 |
Integer r = SquareRoot(); |
|
3934 |
return *this == r.Squared(); |
|
3935 |
}
|
|
3936 |
||
3937 |
bool Integer::IsUnit() const |
|
3938 |
{
|
|
3939 |
return (WordCount() == 1) && (reg[0] == 1); |
|
3940 |
}
|
|
3941 |
||
3942 |
Integer Integer::MultiplicativeInverse() const |
|
3943 |
{
|
|
3944 |
return IsUnit() ? *this : Zero(); |
|
3945 |
}
|
|
3946 |
||
3947 |
Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m) |
|
3948 |
{
|
|
3949 |
return x*y%m; |
|
3950 |
}
|
|
3951 |
||
3952 |
Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m) |
|
3953 |
{
|
|
3954 |
ModularArithmetic mr(m); |
|
3955 |
return mr.Exponentiate(x, e); |
|
3956 |
}
|
|
3957 |
||
3958 |
Integer Integer::Gcd(const Integer &a, const Integer &b) |
|
3959 |
{
|
|
3960 |
return EuclideanDomainOf<Integer>().Gcd(a, b); |
|
3961 |
}
|
|
3962 |
||
3963 |
Integer Integer::InverseMod(const Integer &m) const |
|
3964 |
{
|
|
3965 |
assert(m.NotNegative()); |
|
3966 |
||
412
by weidai
changes for 5.6: |
3967 |
if (IsNegative()) |
3968 |
return Modulo(m).InverseMod(m); |
|
1
by weidai
Initial revision |
3969 |
|
3970 |
if (m.IsEven()) |
|
3971 |
{
|
|
3972 |
if (!m || IsEven()) |
|
3973 |
return Zero(); // no inverse |
|
3974 |
if (*this == One()) |
|
3975 |
return One(); |
|
3976 |
||
412
by weidai
changes for 5.6: |
3977 |
Integer u = m.Modulo(*this).InverseMod(*this); |
1
by weidai
Initial revision |
3978 |
return !u ? Zero() : (m*(*this-u)+1)/(*this); |
3979 |
}
|
|
3980 |
||
3981 |
SecBlock<word> T(m.reg.size() * 4); |
|
3982 |
Integer r((word)0, m.reg.size()); |
|
3983 |
unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size()); |
|
3984 |
DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size()); |
|
3985 |
return r; |
|
3986 |
}
|
|
3987 |
||
249
by weidai
update version number, port to Sun C++ 5.8 |
3988 |
word Integer::InverseMod(word mod) const |
1
by weidai
Initial revision |
3989 |
{
|
3990 |
word g0 = mod, g1 = *this % mod; |
|
3991 |
word v0 = 0, v1 = 1; |
|
3992 |
word y; |
|
3993 |
||
3994 |
while (g1) |
|
3995 |
{
|
|
3996 |
if (g1 == 1) |
|
3997 |
return v1; |
|
3998 |
y = g0 / g1; |
|
3999 |
g0 = g0 % g1; |
|
4000 |
v0 += y * v1; |
|
4001 |
||
4002 |
if (!g0) |
|
4003 |
break; |
|
4004 |
if (g0 == 1) |
|
4005 |
return mod-v0; |
|
4006 |
y = g1 / g0; |
|
4007 |
g1 = g1 % g0; |
|
4008 |
v1 += y * v0; |
|
4009 |
}
|
|
4010 |
return 0; |
|
4011 |
}
|
|
4012 |
||
4013 |
// ********************************************************
|
|
4014 |
||
4015 |
ModularArithmetic::ModularArithmetic(BufferedTransformation &bt) |
|
4016 |
{
|
|
4017 |
BERSequenceDecoder seq(bt); |
|
4018 |
OID oid(seq); |
|
4019 |
if (oid != ASN1::prime_field()) |
|
4020 |
BERDecodeError(); |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4021 |
m_modulus.BERDecode(seq); |
1
by weidai
Initial revision |
4022 |
seq.MessageEnd(); |
181
by weidai
changes done for FIPS-140 lab code drop |
4023 |
m_result.reg.resize(m_modulus.reg.size()); |
1
by weidai
Initial revision |
4024 |
}
|
4025 |
||
4026 |
void ModularArithmetic::DEREncode(BufferedTransformation &bt) const |
|
4027 |
{
|
|
4028 |
DERSequenceEncoder seq(bt); |
|
4029 |
ASN1::prime_field().DEREncode(seq); |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4030 |
m_modulus.DEREncode(seq); |
1
by weidai
Initial revision |
4031 |
seq.MessageEnd(); |
4032 |
}
|
|
4033 |
||
4034 |
void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const |
|
4035 |
{
|
|
4036 |
a.DEREncodeAsOctetString(out, MaxElementByteLength()); |
|
4037 |
}
|
|
4038 |
||
4039 |
void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const |
|
4040 |
{
|
|
4041 |
a.BERDecodeAsOctetString(in, MaxElementByteLength()); |
|
4042 |
}
|
|
4043 |
||
4044 |
const Integer& ModularArithmetic::Half(const Integer &a) const |
|
4045 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4046 |
if (a.reg.size()==m_modulus.reg.size()) |
1
by weidai
Initial revision |
4047 |
{
|
181
by weidai
changes done for FIPS-140 lab code drop |
4048 |
CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size()); |
4049 |
return m_result; |
|
1
by weidai
Initial revision |
4050 |
}
|
4051 |
else
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4052 |
return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1)); |
1
by weidai
Initial revision |
4053 |
}
|
4054 |
||
4055 |
const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const |
|
4056 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4057 |
if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) |
1
by weidai
Initial revision |
4058 |
{
|
181
by weidai
changes done for FIPS-140 lab code drop |
4059 |
if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size()) |
4060 |
|| Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0) |
|
1
by weidai
Initial revision |
4061 |
{
|
181
by weidai
changes done for FIPS-140 lab code drop |
4062 |
CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); |
1
by weidai
Initial revision |
4063 |
}
|
181
by weidai
changes done for FIPS-140 lab code drop |
4064 |
return m_result; |
1
by weidai
Initial revision |
4065 |
}
|
4066 |
else
|
|
4067 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4068 |
m_result1 = a+b; |
4069 |
if (m_result1 >= m_modulus) |
|
4070 |
m_result1 -= m_modulus; |
|
4071 |
return m_result1; |
|
1
by weidai
Initial revision |
4072 |
}
|
4073 |
}
|
|
4074 |
||
4075 |
Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const |
|
4076 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4077 |
if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) |
1
by weidai
Initial revision |
4078 |
{
|
4079 |
if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size()) |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4080 |
|| Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0) |
1
by weidai
Initial revision |
4081 |
{
|
181
by weidai
changes done for FIPS-140 lab code drop |
4082 |
CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size()); |
1
by weidai
Initial revision |
4083 |
}
|
4084 |
}
|
|
4085 |
else
|
|
4086 |
{
|
|
4087 |
a+=b; |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4088 |
if (a>=m_modulus) |
4089 |
a-=m_modulus; |
|
1
by weidai
Initial revision |
4090 |
}
|
4091 |
||
4092 |
return a; |
|
4093 |
}
|
|
4094 |
||
4095 |
const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const |
|
4096 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4097 |
if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) |
1
by weidai
Initial revision |
4098 |
{
|
181
by weidai
changes done for FIPS-140 lab code drop |
4099 |
if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size())) |
4100 |
CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); |
|
4101 |
return m_result; |
|
1
by weidai
Initial revision |
4102 |
}
|
4103 |
else
|
|
4104 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4105 |
m_result1 = a-b; |
4106 |
if (m_result1.IsNegative()) |
|
4107 |
m_result1 += m_modulus; |
|
4108 |
return m_result1; |
|
1
by weidai
Initial revision |
4109 |
}
|
4110 |
}
|
|
4111 |
||
4112 |
Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const |
|
4113 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4114 |
if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) |
1
by weidai
Initial revision |
4115 |
{
|
4116 |
if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size())) |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4117 |
CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size()); |
1
by weidai
Initial revision |
4118 |
}
|
4119 |
else
|
|
4120 |
{
|
|
4121 |
a-=b; |
|
4122 |
if (a.IsNegative()) |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4123 |
a+=m_modulus; |
1
by weidai
Initial revision |
4124 |
}
|
4125 |
||
4126 |
return a; |
|
4127 |
}
|
|
4128 |
||
4129 |
const Integer& ModularArithmetic::Inverse(const Integer &a) const |
|
4130 |
{
|
|
4131 |
if (!a) |
|
4132 |
return a; |
|
4133 |
||
181
by weidai
changes done for FIPS-140 lab code drop |
4134 |
CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size()); |
4135 |
if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size())) |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
4136 |
Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size()); |
1
by weidai
Initial revision |
4137 |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4138 |
return m_result; |
1
by weidai
Initial revision |
4139 |
}
|
4140 |
||
4141 |
Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const |
|
4142 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4143 |
if (m_modulus.IsOdd()) |
1
by weidai
Initial revision |
4144 |
{
|
181
by weidai
changes done for FIPS-140 lab code drop |
4145 |
MontgomeryRepresentation dr(m_modulus); |
1
by weidai
Initial revision |
4146 |
return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2)); |
4147 |
}
|
|
4148 |
else
|
|
4149 |
return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2); |
|
4150 |
}
|
|
4151 |
||
4152 |
void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const |
|
4153 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4154 |
if (m_modulus.IsOdd()) |
1
by weidai
Initial revision |
4155 |
{
|
181
by weidai
changes done for FIPS-140 lab code drop |
4156 |
MontgomeryRepresentation dr(m_modulus); |
1
by weidai
Initial revision |
4157 |
dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount); |
4158 |
for (unsigned int i=0; i<exponentsCount; i++) |
|
4159 |
results[i] = dr.ConvertOut(results[i]); |
|
4160 |
}
|
|
4161 |
else
|
|
4162 |
AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount); |
|
4163 |
}
|
|
4164 |
||
4165 |
MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd |
|
4166 |
: ModularArithmetic(m), |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4167 |
m_u((word)0, m_modulus.reg.size()), |
4168 |
m_workspace(5*m_modulus.reg.size()) |
|
1
by weidai
Initial revision |
4169 |
{
|
181
by weidai
changes done for FIPS-140 lab code drop |
4170 |
if (!m_modulus.IsOdd()) |
1
by weidai
Initial revision |
4171 |
throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus"); |
4172 |
||
181
by weidai
changes done for FIPS-140 lab code drop |
4173 |
RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size()); |
1
by weidai
Initial revision |
4174 |
}
|
4175 |
||
4176 |
const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const |
|
4177 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4178 |
word *const T = m_workspace.begin(); |
4179 |
word *const R = m_result.reg.begin(); |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
4180 |
const size_t N = m_modulus.reg.size(); |
1
by weidai
Initial revision |
4181 |
assert(a.reg.size()<=N && b.reg.size()<=N); |
4182 |
||
4183 |
AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size()); |
|
4184 |
SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size()); |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4185 |
MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); |
4186 |
return m_result; |
|
1
by weidai
Initial revision |
4187 |
}
|
4188 |
||
4189 |
const Integer& MontgomeryRepresentation::Square(const Integer &a) const |
|
4190 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4191 |
word *const T = m_workspace.begin(); |
4192 |
word *const R = m_result.reg.begin(); |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
4193 |
const size_t N = m_modulus.reg.size(); |
1
by weidai
Initial revision |
4194 |
assert(a.reg.size()<=N); |
4195 |
||
4196 |
CryptoPP::Square(T, T+2*N, a.reg, a.reg.size()); |
|
4197 |
SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size()); |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4198 |
MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); |
4199 |
return m_result; |
|
1
by weidai
Initial revision |
4200 |
}
|
4201 |
||
4202 |
Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const |
|
4203 |
{
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4204 |
word *const T = m_workspace.begin(); |
4205 |
word *const R = m_result.reg.begin(); |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
4206 |
const size_t N = m_modulus.reg.size(); |
1
by weidai
Initial revision |
4207 |
assert(a.reg.size()<=N); |
4208 |
||
4209 |
CopyWords(T, a.reg, a.reg.size()); |
|
4210 |
SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4211 |
MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); |
4212 |
return m_result; |
|
1
by weidai
Initial revision |
4213 |
}
|
4214 |
||
4215 |
const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const |
|
4216 |
{
|
|
4217 |
// return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
|
|
181
by weidai
changes done for FIPS-140 lab code drop |
4218 |
word *const T = m_workspace.begin(); |
4219 |
word *const R = m_result.reg.begin(); |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
4220 |
const size_t N = m_modulus.reg.size(); |
1
by weidai
Initial revision |
4221 |
assert(a.reg.size()<=N); |
4222 |
||
4223 |
CopyWords(T, a.reg, a.reg.size()); |
|
4224 |
SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4225 |
MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); |
4226 |
unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N); |
|
1
by weidai
Initial revision |
4227 |
|
4228 |
// cout << "k=" << k << " N*32=" << 32*N << endl;
|
|
4229 |
||
4230 |
if (k>N*WORD_BITS) |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4231 |
DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N); |
1
by weidai
Initial revision |
4232 |
else
|
181
by weidai
changes done for FIPS-140 lab code drop |
4233 |
MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N); |
1
by weidai
Initial revision |
4234 |
|
181
by weidai
changes done for FIPS-140 lab code drop |
4235 |
return m_result; |
1
by weidai
Initial revision |
4236 |
}
|
4237 |
||
4238 |
NAMESPACE_END
|
|
75
by weidai
create DLL version, fix GetNextIV() bug in CTR and OFB modes |
4239 |
|
4240 |
#endif
|