1
by weidai
Initial revision |
1 |
// panama.cpp - written and placed in the public domain by Wei Dai
|
2 |
||
376
by weidai
- port x64 assembly code to MASM |
3 |
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM panama.cpp" to generate MASM code
|
4 |
||
1
by weidai
Initial revision |
5 |
#include "pch.h" |
376
by weidai
- port x64 assembly code to MASM |
6 |
|
7 |
#ifndef CRYPTOPP_GENERATE_X64_MASM
|
|
8 |
||
1
by weidai
Initial revision |
9 |
#include "panama.h" |
10 |
#include "misc.h" |
|
265
by weidai
SSE2 optimizations |
11 |
#include "cpu.h" |
1
by weidai
Initial revision |
12 |
|
13 |
NAMESPACE_BEGIN(CryptoPP) |
|
14 |
||
15 |
template <class B> |
|
16 |
void Panama<B>::Reset() |
|
17 |
{
|
|
265
by weidai
SSE2 optimizations |
18 |
memset(m_state, 0, m_state.SizeInBytes()); |
19 |
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
|
|
20 |
m_state[17] = HasSSSE3(); |
|
21 |
#endif
|
|
22 |
}
|
|
23 |
||
376
by weidai
- port x64 assembly code to MASM |
24 |
#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM |
25 |
||
26 |
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
|
|
27 |
extern "C" { |
|
28 |
void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y); |
|
29 |
}
|
|
30 |
#elif CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
|
31 |
||
32 |
#ifdef CRYPTOPP_GENERATE_X64_MASM
|
|
33 |
Panama_SSE2_Pull PROC FRAME |
|
440
by weidai
fixed Panama x64 MASM code not saving RDI |
34 |
rex_push_reg rdi |
35 |
alloc_stack(2*16) |
|
376
by weidai
- port x64 assembly code to MASM |
36 |
save_xmm128 xmm6, 0h |
37 |
save_xmm128 xmm7, 10h |
|
38 |
.endprolog |
|
39 |
#else
|
|
265
by weidai
SSE2 optimizations |
40 |
#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code |
385
by weidai
fix compile for ICC 10 |
41 |
void CRYPTOPP_NOINLINE Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) |
265
by weidai
SSE2 optimizations |
42 |
{
|
497
by weidai
fix compile with GCC 4.4 and -march=i386 |
43 |
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
44 |
asm __volatile__ |
|
265
by weidai
SSE2 optimizations |
45 |
(
|
46 |
".intel_syntax noprefix;"
|
|
385
by weidai
fix compile for ICC 10 |
47 |
AS_PUSH_IF86( bx) |
376
by weidai
- port x64 assembly code to MASM |
48 |
#else
|
49 |
AS2( mov AS_REG_1, count) |
|
50 |
AS2( mov AS_REG_2, state) |
|
51 |
AS2( mov AS_REG_3, z) |
|
52 |
AS2( mov AS_REG_4, y) |
|
53 |
#endif
|
|
54 |
#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM |
|
55 |
||
56 |
#if CRYPTOPP_BOOL_X86
|
|
57 |
#define REG_loopEnd [esp]
|
|
58 |
#elif defined(CRYPTOPP_GENERATE_X64_MASM)
|
|
59 |
#define REG_loopEnd rdi
|
|
60 |
#else
|
|
61 |
#define REG_loopEnd r8
|
|
62 |
#endif
|
|
63 |
||
64 |
AS2( shl AS_REG_1, 5) |
|
265
by weidai
SSE2 optimizations |
65 |
ASJ( jz, 5, f) |
376
by weidai
- port x64 assembly code to MASM |
66 |
AS2( mov AS_REG_6d, [AS_REG_2+4*17]) |
67 |
AS2( add AS_REG_1, AS_REG_6) |
|
68 |
||
69 |
#if CRYPTOPP_BOOL_X64
|
|
70 |
AS2( mov REG_loopEnd, AS_REG_1) |
|
71 |
#else
|
|
72 |
AS1( push ebp) |
|
73 |
AS1( push AS_REG_1) |
|
74 |
#endif
|
|
75 |
||
380
by weidai
fix compile with GAS 2.15 |
76 |
AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16]) |
77 |
AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16]) |
|
78 |
AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16]) |
|
79 |
AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16]) |
|
376
by weidai
- port x64 assembly code to MASM |
80 |
AS2( mov eax, dword ptr [AS_REG_2+4*16]) |
265
by weidai
SSE2 optimizations |
81 |
|
82 |
ASL(4) |
|
83 |
// gamma and pi
|
|
84 |
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
|
|
376
by weidai
- port x64 assembly code to MASM |
85 |
AS2( test AS_REG_6, 1) |
265
by weidai
SSE2 optimizations |
86 |
ASJ( jnz, 6, f) |
87 |
#endif
|
|
88 |
AS2( movdqa xmm6, xmm2) |
|
89 |
AS2( movss xmm6, xmm3) |
|
90 |
ASS( pshufd xmm5, xmm6, 0, 3, 2, 1) |
|
91 |
AS2( movd xmm6, eax) |
|
92 |
AS2( movdqa xmm7, xmm3) |
|
93 |
AS2( movss xmm7, xmm6) |
|
94 |
ASS( pshufd xmm6, xmm7, 0, 3, 2, 1) |
|
95 |
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
|
|
96 |
ASJ( jmp, 7, f) |
|
97 |
ASL(6) |
|
98 |
AS2( movdqa xmm5, xmm3) |
|
99 |
AS3( palignr xmm5, xmm2, 4) |
|
100 |
AS2( movd xmm6, eax) |
|
101 |
AS3( palignr xmm6, xmm3, 4) |
|
102 |
ASL(7) |
|
103 |
#endif
|
|
104 |
||
376
by weidai
- port x64 assembly code to MASM |
105 |
AS2( movd AS_REG_1d, xmm2) |
106 |
AS1( not AS_REG_1d) |
|
107 |
AS2( movd AS_REG_7d, xmm3) |
|
108 |
AS2( or AS_REG_1d, AS_REG_7d) |
|
109 |
AS2( xor eax, AS_REG_1d) |
|
265
by weidai
SSE2 optimizations |
110 |
|
111 |
#define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
|
|
112 |
||
113 |
#define pi(i) \
|
|
376
by weidai
- port x64 assembly code to MASM |
114 |
AS2( movd AS_REG_1d, xmm7)\
|
115 |
AS2( rol AS_REG_1d, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
|
|
116 |
AS2( mov [AS_REG_2+SSE2_Index(ASM_MOD(5*(i), 17))*4], AS_REG_1d)
|
|
265
by weidai
SSE2 optimizations |
117 |
|
118 |
#define pi4(x, y, z, a, b, c, d) \
|
|
119 |
AS2( pcmpeqb xmm7, xmm7)\
|
|
120 |
AS2( pxor xmm7, x)\
|
|
121 |
AS2( por xmm7, y)\
|
|
122 |
AS2( pxor xmm7, z)\
|
|
123 |
pi(a)\
|
|
124 |
ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
|
|
125 |
pi(b)\
|
|
126 |
AS2( punpckhqdq xmm7, xmm7)\
|
|
127 |
pi(c)\
|
|
128 |
ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
|
|
129 |
pi(d)
|
|
130 |
||
131 |
pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13) |
|
132 |
pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14) |
|
133 |
pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15) |
|
134 |
pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16) |
|
135 |
||
136 |
// output keystream and update buffer here to hide partial memory stalls between pi and theta
|
|
137 |
AS2( movdqa xmm4, xmm3) |
|
138 |
AS2( punpcklqdq xmm3, xmm2) // 1 5 2 6 |
|
139 |
AS2( punpckhdq xmm4, xmm2) // 9 10 13 14 |
|
140 |
AS2( movdqa xmm2, xmm1) |
|
141 |
AS2( punpcklqdq xmm1, xmm0) // 3 7 4 8 |
|
142 |
AS2( punpckhdq xmm2, xmm0) // 11 12 15 16 |
|
143 |
||
144 |
// keystream
|
|
376
by weidai
- port x64 assembly code to MASM |
145 |
AS2( test AS_REG_3, AS_REG_3) |
265
by weidai
SSE2 optimizations |
146 |
ASJ( jz, 0, f) |
147 |
AS2( movdqa xmm6, xmm4) |
|
148 |
AS2( punpcklqdq xmm4, xmm2) |
|
149 |
AS2( punpckhqdq xmm6, xmm2) |
|
376
by weidai
- port x64 assembly code to MASM |
150 |
AS2( test AS_REG_4, 15) |
265
by weidai
SSE2 optimizations |
151 |
ASJ( jnz, 2, f) |
376
by weidai
- port x64 assembly code to MASM |
152 |
AS2( test AS_REG_4, AS_REG_4) |
265
by weidai
SSE2 optimizations |
153 |
ASJ( jz, 1, f) |
376
by weidai
- port x64 assembly code to MASM |
154 |
AS2( pxor xmm4, [AS_REG_4]) |
155 |
AS2( pxor xmm6, [AS_REG_4+16]) |
|
156 |
AS2( add AS_REG_4, 32) |
|
265
by weidai
SSE2 optimizations |
157 |
ASJ( jmp, 1, f) |
158 |
ASL(2) |
|
376
by weidai
- port x64 assembly code to MASM |
159 |
AS2( movdqu xmm0, [AS_REG_4]) |
160 |
AS2( movdqu xmm2, [AS_REG_4+16]) |
|
265
by weidai
SSE2 optimizations |
161 |
AS2( pxor xmm4, xmm0) |
162 |
AS2( pxor xmm6, xmm2) |
|
376
by weidai
- port x64 assembly code to MASM |
163 |
AS2( add AS_REG_4, 32) |
265
by weidai
SSE2 optimizations |
164 |
ASL(1) |
376
by weidai
- port x64 assembly code to MASM |
165 |
AS2( test AS_REG_3, 15) |
265
by weidai
SSE2 optimizations |
166 |
ASJ( jnz, 3, f) |
380
by weidai
fix compile with GAS 2.15 |
167 |
AS2( movdqa XMMWORD_PTR [AS_REG_3], xmm4) |
168 |
AS2( movdqa XMMWORD_PTR [AS_REG_3+16], xmm6) |
|
376
by weidai
- port x64 assembly code to MASM |
169 |
AS2( add AS_REG_3, 32) |
265
by weidai
SSE2 optimizations |
170 |
ASJ( jmp, 0, f) |
171 |
ASL(3) |
|
380
by weidai
fix compile with GAS 2.15 |
172 |
AS2( movdqu XMMWORD_PTR [AS_REG_3], xmm4) |
173 |
AS2( movdqu XMMWORD_PTR [AS_REG_3+16], xmm6) |
|
376
by weidai
- port x64 assembly code to MASM |
174 |
AS2( add AS_REG_3, 32) |
265
by weidai
SSE2 optimizations |
175 |
ASL(0) |
176 |
||
177 |
// buffer update
|
|
376
by weidai
- port x64 assembly code to MASM |
178 |
AS2( lea AS_REG_1, [AS_REG_6 + 32]) |
179 |
AS2( and AS_REG_1, 31*32) |
|
180 |
AS2( lea AS_REG_7, [AS_REG_6 + (32-24)*32]) |
|
181 |
AS2( and AS_REG_7, 31*32) |
|
265
by weidai
SSE2 optimizations |
182 |
|
380
by weidai
fix compile with GAS 2.15 |
183 |
AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8]) |
265
by weidai
SSE2 optimizations |
184 |
AS2( pxor xmm3, xmm0) |
185 |
ASS( pshufd xmm0, xmm0, 2, 3, 0, 1) |
|
380
by weidai
fix compile with GAS 2.15 |
186 |
AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3) |
187 |
AS2( pxor xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8]) |
|
188 |
AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0) |
|
265
by weidai
SSE2 optimizations |
189 |
|
380
by weidai
fix compile with GAS 2.15 |
190 |
AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8]) |
265
by weidai
SSE2 optimizations |
191 |
AS2( pxor xmm1, xmm4) |
380
by weidai
fix compile with GAS 2.15 |
192 |
AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1) |
193 |
AS2( pxor xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8]) |
|
194 |
AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4) |
|
265
by weidai
SSE2 optimizations |
195 |
|
196 |
// theta
|
|
380
by weidai
fix compile with GAS 2.15 |
197 |
AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16]) |
198 |
AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16]) |
|
199 |
AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16]) |
|
200 |
AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16]) |
|
265
by weidai
SSE2 optimizations |
201 |
|
202 |
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
|
|
376
by weidai
- port x64 assembly code to MASM |
203 |
AS2( test AS_REG_6, 1) |
265
by weidai
SSE2 optimizations |
204 |
ASJ( jnz, 8, f) |
205 |
#endif
|
|
206 |
AS2( movd xmm6, eax) |
|
207 |
AS2( movdqa xmm7, xmm3) |
|
208 |
AS2( movss xmm7, xmm6) |
|
209 |
AS2( movdqa xmm6, xmm2) |
|
210 |
AS2( movss xmm6, xmm3) |
|
211 |
AS2( movdqa xmm5, xmm1) |
|
212 |
AS2( movss xmm5, xmm2) |
|
213 |
AS2( movdqa xmm4, xmm0) |
|
214 |
AS2( movss xmm4, xmm1) |
|
215 |
ASS( pshufd xmm7, xmm7, 0, 3, 2, 1) |
|
216 |
ASS( pshufd xmm6, xmm6, 0, 3, 2, 1) |
|
217 |
ASS( pshufd xmm5, xmm5, 0, 3, 2, 1) |
|
218 |
ASS( pshufd xmm4, xmm4, 0, 3, 2, 1) |
|
219 |
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
|
|
220 |
ASJ( jmp, 9, f) |
|
221 |
ASL(8) |
|
222 |
AS2( movd xmm7, eax) |
|
223 |
AS3( palignr xmm7, xmm3, 4) |
|
224 |
AS2( movq xmm6, xmm3) |
|
225 |
AS3( palignr xmm6, xmm2, 4) |
|
226 |
AS2( movq xmm5, xmm2) |
|
227 |
AS3( palignr xmm5, xmm1, 4) |
|
228 |
AS2( movq xmm4, xmm1) |
|
229 |
AS3( palignr xmm4, xmm0, 4) |
|
230 |
ASL(9) |
|
231 |
#endif
|
|
232 |
||
233 |
AS2( xor eax, 1) |
|
376
by weidai
- port x64 assembly code to MASM |
234 |
AS2( movd AS_REG_1d, xmm0) |
235 |
AS2( xor eax, AS_REG_1d) |
|
236 |
AS2( movd AS_REG_1d, xmm3) |
|
237 |
AS2( xor eax, AS_REG_1d) |
|
265
by weidai
SSE2 optimizations |
238 |
|
239 |
AS2( pxor xmm3, xmm2) |
|
240 |
AS2( pxor xmm2, xmm1) |
|
241 |
AS2( pxor xmm1, xmm0) |
|
242 |
AS2( pxor xmm0, xmm7) |
|
243 |
AS2( pxor xmm3, xmm7) |
|
244 |
AS2( pxor xmm2, xmm6) |
|
245 |
AS2( pxor xmm1, xmm5) |
|
246 |
AS2( pxor xmm0, xmm4) |
|
247 |
||
248 |
// sigma
|
|
376
by weidai
- port x64 assembly code to MASM |
249 |
AS2( lea AS_REG_1, [AS_REG_6 + (32-4)*32]) |
250 |
AS2( and AS_REG_1, 31*32) |
|
251 |
AS2( lea AS_REG_7, [AS_REG_6 + 16*32]) |
|
252 |
AS2( and AS_REG_7, 31*32) |
|
265
by weidai
SSE2 optimizations |
253 |
|
380
by weidai
fix compile with GAS 2.15 |
254 |
AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16]) |
255 |
AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16]) |
|
265
by weidai
SSE2 optimizations |
256 |
AS2( movdqa xmm6, xmm4) |
257 |
AS2( punpcklqdq xmm4, xmm5) |
|
258 |
AS2( punpckhqdq xmm6, xmm5) |
|
259 |
AS2( pxor xmm3, xmm4) |
|
260 |
AS2( pxor xmm2, xmm6) |
|
261 |
||
380
by weidai
fix compile with GAS 2.15 |
262 |
AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16]) |
263 |
AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16]) |
|
265
by weidai
SSE2 optimizations |
264 |
AS2( movdqa xmm6, xmm4) |
265 |
AS2( punpcklqdq xmm4, xmm5) |
|
266 |
AS2( punpckhqdq xmm6, xmm5) |
|
267 |
AS2( pxor xmm1, xmm4) |
|
268 |
AS2( pxor xmm0, xmm6) |
|
269 |
||
270 |
// loop
|
|
376
by weidai
- port x64 assembly code to MASM |
271 |
AS2( add AS_REG_6, 32) |
272 |
AS2( cmp AS_REG_6, REG_loopEnd) |
|
265
by weidai
SSE2 optimizations |
273 |
ASJ( jne, 4, b) |
274 |
||
275 |
// save state
|
|
376
by weidai
- port x64 assembly code to MASM |
276 |
AS2( mov [AS_REG_2+4*16], eax) |
380
by weidai
fix compile with GAS 2.15 |
277 |
AS2( movdqa XMMWORD_PTR [AS_REG_2+3*16], xmm3) |
278 |
AS2( movdqa XMMWORD_PTR [AS_REG_2+2*16], xmm2) |
|
279 |
AS2( movdqa XMMWORD_PTR [AS_REG_2+1*16], xmm1) |
|
280 |
AS2( movdqa XMMWORD_PTR [AS_REG_2+0*16], xmm0) |
|
376
by weidai
- port x64 assembly code to MASM |
281 |
|
282 |
#if CRYPTOPP_BOOL_X86
|
|
283 |
AS2( add esp, 4) |
|
284 |
AS1( pop ebp) |
|
285 |
#endif
|
|
265
by weidai
SSE2 optimizations |
286 |
ASL(5) |
287 |
||
497
by weidai
fix compile with GCC 4.4 and -march=i386 |
288 |
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
376
by weidai
- port x64 assembly code to MASM |
289 |
AS_POP_IF86( bx) |
290 |
".att_syntax prefix;"
|
|
291 |
:
|
|
292 |
#if CRYPTOPP_BOOL_X64
|
|
293 |
: "D" (count), "S" (state), "d" (z), "c" (y) |
|
294 |
: "%r8", "%r9", "r10", "%eax", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7" |
|
295 |
#else
|
|
296 |
: "c" (count), "d" (state), "S" (z), "D" (y) |
|
383
by weidai
remove -msse2 since we don't use SSE2 intrinsics anymore |
297 |
: "%eax", "memory", "cc" |
376
by weidai
- port x64 assembly code to MASM |
298 |
#endif
|
265
by weidai
SSE2 optimizations |
299 |
);
|
300 |
#endif
|
|
376
by weidai
- port x64 assembly code to MASM |
301 |
#ifdef CRYPTOPP_GENERATE_X64_MASM
|
302 |
movdqa xmm6, [rsp + 0h] |
|
303 |
movdqa xmm7, [rsp + 10h] |
|
440
by weidai
fixed Panama x64 MASM code not saving RDI |
304 |
add rsp, 2*16 |
305 |
pop rdi |
|
376
by weidai
- port x64 assembly code to MASM |
306 |
ret
|
307 |
Panama_SSE2_Pull ENDP |
|
308 |
#else
|
|
265
by weidai
SSE2 optimizations |
309 |
}
|
310 |
#endif
|
|
376
by weidai
- port x64 assembly code to MASM |
311 |
#endif // #ifdef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE |
312 |
||
313 |
#ifndef CRYPTOPP_GENERATE_X64_MASM
|
|
1
by weidai
Initial revision |
314 |
|
315 |
template <class B> |
|
503
by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856 |
316 |
void Panama<B>::Iterate(size_t count, const word32 *p, byte *output, const byte *input, KeystreamOperation operation) |
1
by weidai
Initial revision |
317 |
{
|
265
by weidai
SSE2 optimizations |
318 |
word32 bstart = m_state[17]; |
319 |
word32 *const aPtr = m_state; |
|
320 |
word32 cPtr[17]; |
|
321 |
||
322 |
#define bPtr ((byte *)(aPtr+20))
|
|
323 |
||
324 |
// reorder the state for SSE2
|
|
325 |
// a and c: 4 8 12 16 | 3 7 11 15 | 2 6 10 14 | 1 5 9 13 | 0
|
|
326 |
// xmm0 xmm1 xmm2 xmm3 eax
|
|
327 |
#define a(i) aPtr[((i)*13+16) % 17] // 13 is inverse of 4 mod 17 |
|
328 |
#define c(i) cPtr[((i)*13+16) % 17]
|
|
329 |
// b: 0 4 | 1 5 | 2 6 | 3 7
|
|
330 |
#define b(i, j) b##i[(j)*2%8 + (j)/4]
|
|
1
by weidai
Initial revision |
331 |
|
332 |
// buffer update
|
|
265
by weidai
SSE2 optimizations |
333 |
#define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
|
334 |
#define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
|
|
1
by weidai
Initial revision |
335 |
// gamma and pi
|
265
by weidai
SSE2 optimizations |
336 |
#define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
|
1
by weidai
Initial revision |
337 |
// theta and sigma
|
265
by weidai
SSE2 optimizations |
338 |
#define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
|
1
by weidai
Initial revision |
339 |
#define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
|
265
by weidai
SSE2 optimizations |
340 |
#define TS1L(i) T(i+1, b(4,i))
|
341 |
#define TS2(i) T(i+9, b(16,i))
|
|
1
by weidai
Initial revision |
342 |
|
343 |
while (count--) |
|
344 |
{
|
|
503
by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856 |
345 |
if (output) |
1
by weidai
Initial revision |
346 |
{
|
503
by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856 |
347 |
#define PANAMA_OUTPUT(x) \
|
348 |
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, a(0+9));\
|
|
349 |
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, a(1+9));\
|
|
350 |
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, a(2+9));\
|
|
351 |
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a(3+9));\
|
|
352 |
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 4, a(4+9));\
|
|
353 |
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 5, a(5+9));\
|
|
354 |
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 6, a(6+9));\
|
|
355 |
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 7, a(7+9));
|
|
356 |
||
357 |
typedef word32 WordType; |
|
358 |
CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(PANAMA_OUTPUT, 4*8); |
|
1
by weidai
Initial revision |
359 |
}
|
360 |
||
265
by weidai
SSE2 optimizations |
361 |
word32 *const b16 = (word32 *)(bPtr+((bstart+16*32) & 31*32)); |
362 |
word32 *const b4 = (word32 *)(bPtr+((bstart+(32-4)*32) & 31*32)); |
|
363 |
bstart += 32; |
|
364 |
word32 *const b0 = (word32 *)(bPtr+((bstart) & 31*32)); |
|
365 |
word32 *const b25 = (word32 *)(bPtr+((bstart+(32-25)*32) & 31*32)); |
|
1
by weidai
Initial revision |
366 |
|
367 |
if (p) |
|
368 |
{
|
|
369 |
US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7); |
|
370 |
}
|
|
371 |
else
|
|
372 |
{
|
|
373 |
UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7); |
|
374 |
}
|
|
375 |
||
265
by weidai
SSE2 optimizations |
376 |
GP(0); |
377 |
GP(1); |
|
378 |
GP(2); |
|
379 |
GP(3); |
|
380 |
GP(4); |
|
381 |
GP(5); |
|
382 |
GP(6); |
|
383 |
GP(7); |
|
384 |
GP(8); |
|
385 |
GP(9); |
|
386 |
GP(10); |
|
387 |
GP(11); |
|
388 |
GP(12); |
|
389 |
GP(13); |
|
390 |
GP(14); |
|
391 |
GP(15); |
|
392 |
GP(16); |
|
1
by weidai
Initial revision |
393 |
|
394 |
T(0,1); |
|
395 |
||
396 |
if (p) |
|
397 |
{
|
|
398 |
TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7); |
|
399 |
p += 8; |
|
400 |
}
|
|
401 |
else
|
|
402 |
{
|
|
403 |
TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7); |
|
404 |
}
|
|
405 |
||
406 |
TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7); |
|
407 |
}
|
|
265
by weidai
SSE2 optimizations |
408 |
m_state[17] = bstart; |
1
by weidai
Initial revision |
409 |
}
|
410 |
||
348
by weidai
fix compile on Turbo C++ 2006 |
411 |
namespace Weak { |
1
by weidai
Initial revision |
412 |
template <class B> |
348
by weidai
fix compile on Turbo C++ 2006 |
413 |
size_t PanamaHash<B>::HashMultipleBlocks(const word32 *input, size_t length) |
1
by weidai
Initial revision |
414 |
{
|
156
by weidai
port to GCC 3.4 |
415 |
this->Iterate(length / this->BLOCKSIZE, input); |
416 |
return length % this->BLOCKSIZE; |
|
1
by weidai
Initial revision |
417 |
}
|
418 |
||
419 |
template <class B> |
|
348
by weidai
fix compile on Turbo C++ 2006 |
420 |
void PanamaHash<B>::TruncatedFinal(byte *hash, size_t size) |
1
by weidai
Initial revision |
421 |
{
|
156
by weidai
port to GCC 3.4 |
422 |
this->ThrowIfInvalidTruncatedSize(size); |
1
by weidai
Initial revision |
423 |
|
499
by weidai
fix for compiling with Clang from Marshall Clow |
424 |
this->PadLastBlock(this->BLOCKSIZE, 0x01); |
1
by weidai
Initial revision |
425 |
|
156
by weidai
port to GCC 3.4 |
426 |
HashEndianCorrectedBlock(this->m_data); |
427 |
||
428 |
this->Iterate(32); // pull |
|
429 |
||
265
by weidai
SSE2 optimizations |
430 |
FixedSizeSecBlock<word32, 8> buf; |
503
by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856 |
431 |
this->Iterate(1, NULL, buf.BytePtr(), NULL); |
265
by weidai
SSE2 optimizations |
432 |
|
433 |
memcpy(hash, buf, size); |
|
156
by weidai
port to GCC 3.4 |
434 |
|
435 |
this->Restart(); // reinit for next use |
|
1
by weidai
Initial revision |
436 |
}
|
348
by weidai
fix compile on Turbo C++ 2006 |
437 |
}
|
1
by weidai
Initial revision |
438 |
|
439 |
template <class B> |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
440 |
void PanamaCipherPolicy<B>::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length) |
1
by weidai
Initial revision |
441 |
{
|
265
by weidai
SSE2 optimizations |
442 |
assert(length==32); |
443 |
memcpy(m_key, key, 32); |
|
444 |
}
|
|
1
by weidai
Initial revision |
445 |
|
265
by weidai
SSE2 optimizations |
446 |
template <class B> |
412
by weidai
changes for 5.6: |
447 |
void PanamaCipherPolicy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length) |
265
by weidai
SSE2 optimizations |
448 |
{
|
412
by weidai
changes for 5.6: |
449 |
assert(length==32); |
156
by weidai
port to GCC 3.4 |
450 |
this->Reset(); |
265
by weidai
SSE2 optimizations |
451 |
this->Iterate(1, m_key); |
452 |
if (iv && IsAligned<word32>(iv)) |
|
453 |
this->Iterate(1, (const word32 *)iv); |
|
454 |
else
|
|
455 |
{
|
|
456 |
FixedSizeSecBlock<word32, 8> buf; |
|
457 |
if (iv) |
|
458 |
memcpy(buf, iv, 32); |
|
459 |
else
|
|
460 |
memset(buf, 0, 32); |
|
461 |
this->Iterate(1, buf); |
|
462 |
}
|
|
463 |
||
376
by weidai
- port x64 assembly code to MASM |
464 |
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
|
442
by weidai
disable Panama SSE2 code for P4 |
465 |
if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2() && !IsP4()) // SSE2 code is slower on P4 Prescott |
265
by weidai
SSE2 optimizations |
466 |
Panama_SSE2_Pull(32, this->m_state, NULL, NULL); |
467 |
else
|
|
468 |
#endif
|
|
469 |
this->Iterate(32); |
|
470 |
}
|
|
471 |
||
472 |
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
|
|
473 |
template <class B> |
|
474 |
unsigned int PanamaCipherPolicy<B>::GetAlignment() const |
|
475 |
{
|
|
376
by weidai
- port x64 assembly code to MASM |
476 |
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
|
265
by weidai
SSE2 optimizations |
477 |
if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2()) |
478 |
return 16; |
|
479 |
else
|
|
480 |
#endif
|
|
481 |
return 1; |
|
482 |
}
|
|
483 |
#endif
|
|
1
by weidai
Initial revision |
484 |
|
485 |
template <class B> |
|
184
by weidai
port to MSVC .NET 2005 beta 2 |
486 |
void PanamaCipherPolicy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) |
1
by weidai
Initial revision |
487 |
{
|
376
by weidai
- port x64 assembly code to MASM |
488 |
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
|
265
by weidai
SSE2 optimizations |
489 |
if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2()) |
490 |
Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)output, (const word32 *)input); |
|
491 |
else
|
|
492 |
#endif
|
|
503
by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856 |
493 |
this->Iterate(iterationCount, NULL, output, input, operation); |
1
by weidai
Initial revision |
494 |
}
|
495 |
||
496 |
template class Panama<BigEndian>; |
|
497 |
template class Panama<LittleEndian>; |
|
498 |
||
265
by weidai
SSE2 optimizations |
499 |
template class Weak::PanamaHash<BigEndian>; |
500 |
template class Weak::PanamaHash<LittleEndian>; |
|
1
by weidai
Initial revision |
501 |
|
502 |
template class PanamaCipherPolicy<BigEndian>; |
|
503 |
template class PanamaCipherPolicy<LittleEndian>; |
|
504 |
||
505 |
NAMESPACE_END
|
|
376
by weidai
- port x64 assembly code to MASM |
506 |
|
507 |
#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM |