~zooko/cryptopp/trunk

1 by weidai
Initial revision
1
// panama.cpp - written and placed in the public domain by Wei Dai
2
376 by weidai
- port x64 assembly code to MASM
3
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM panama.cpp" to generate MASM code
4
1 by weidai
Initial revision
5
#include "pch.h"
376 by weidai
- port x64 assembly code to MASM
6
7
#ifndef CRYPTOPP_GENERATE_X64_MASM
8
1 by weidai
Initial revision
9
#include "panama.h"
10
#include "misc.h"
265 by weidai
SSE2 optimizations
11
#include "cpu.h"
1 by weidai
Initial revision
12
13
NAMESPACE_BEGIN(CryptoPP)
14
15
template <class B>
16
void Panama<B>::Reset()
17
{
265 by weidai
SSE2 optimizations
18
	memset(m_state, 0, m_state.SizeInBytes());
19
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
20
	m_state[17] = HasSSSE3();
21
#endif
22
}
23
376 by weidai
- port x64 assembly code to MASM
24
#endif	// #ifndef CRYPTOPP_GENERATE_X64_MASM
25
26
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
27
extern "C" {
28
void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y);
29
}
30
#elif CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
31
32
#ifdef CRYPTOPP_GENERATE_X64_MASM
33
	Panama_SSE2_Pull	PROC FRAME
440 by weidai
fixed Panama x64 MASM code not saving RDI
34
	rex_push_reg rdi
35
	alloc_stack(2*16)
376 by weidai
- port x64 assembly code to MASM
36
	save_xmm128 xmm6, 0h
37
	save_xmm128 xmm7, 10h
38
	.endprolog
39
#else
265 by weidai
SSE2 optimizations
40
#pragma warning(disable: 4731)	// frame pointer register 'ebp' modified by inline assembly code
385 by weidai
fix compile for ICC 10
41
void CRYPTOPP_NOINLINE Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
265 by weidai
SSE2 optimizations
42
{
497 by weidai
fix compile with GCC 4.4 and -march=i386
43
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
44
	asm __volatile__
265 by weidai
SSE2 optimizations
45
	(
46
		".intel_syntax noprefix;"
385 by weidai
fix compile for ICC 10
47
		AS_PUSH_IF86(	bx)
376 by weidai
- port x64 assembly code to MASM
48
#else
49
	AS2(	mov		AS_REG_1, count)
50
	AS2(	mov		AS_REG_2, state)
51
	AS2(	mov		AS_REG_3, z)
52
	AS2(	mov		AS_REG_4, y)
53
#endif
54
#endif	// #ifdef CRYPTOPP_GENERATE_X64_MASM
55
56
#if CRYPTOPP_BOOL_X86
57
	#define REG_loopEnd			[esp]
58
#elif defined(CRYPTOPP_GENERATE_X64_MASM)
59
	#define REG_loopEnd			rdi
60
#else
61
	#define REG_loopEnd			r8
62
#endif
63
64
	AS2(	shl		AS_REG_1, 5)
265 by weidai
SSE2 optimizations
65
	ASJ(	jz,		5, f)
376 by weidai
- port x64 assembly code to MASM
66
	AS2(	mov		AS_REG_6d, [AS_REG_2+4*17])
67
	AS2(	add		AS_REG_1, AS_REG_6)
68
69
	#if CRYPTOPP_BOOL_X64
70
		AS2(	mov		REG_loopEnd, AS_REG_1)
71
	#else
72
		AS1(	push	ebp)
73
		AS1(	push	AS_REG_1)
74
	#endif
75
380 by weidai
fix compile with GAS 2.15
76
	AS2(	movdqa	xmm0, XMMWORD_PTR [AS_REG_2+0*16])
77
	AS2(	movdqa	xmm1, XMMWORD_PTR [AS_REG_2+1*16])
78
	AS2(	movdqa	xmm2, XMMWORD_PTR [AS_REG_2+2*16])
79
	AS2(	movdqa	xmm3, XMMWORD_PTR [AS_REG_2+3*16])
376 by weidai
- port x64 assembly code to MASM
80
	AS2(	mov		eax, dword ptr [AS_REG_2+4*16])
265 by weidai
SSE2 optimizations
81
82
	ASL(4)
83
	// gamma and pi
84
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
376 by weidai
- port x64 assembly code to MASM
85
	AS2(	test	AS_REG_6, 1)
265 by weidai
SSE2 optimizations
86
	ASJ(	jnz,	6, f)
87
#endif
88
	AS2(	movdqa	xmm6, xmm2)
89
	AS2(	movss	xmm6, xmm3)
90
	ASS(	pshufd	xmm5, xmm6, 0, 3, 2, 1)
91
	AS2(	movd	xmm6, eax)
92
	AS2(	movdqa	xmm7, xmm3)
93
	AS2(	movss	xmm7, xmm6)
94
	ASS(	pshufd	xmm6, xmm7, 0, 3, 2, 1)
95
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
96
	ASJ(	jmp,	7, f)
97
	ASL(6)
98
	AS2(	movdqa	xmm5, xmm3)
99
	AS3(	palignr	xmm5, xmm2, 4)
100
	AS2(	movd	xmm6, eax)
101
	AS3(	palignr	xmm6, xmm3, 4)
102
	ASL(7)
103
#endif
104
376 by weidai
- port x64 assembly code to MASM
105
	AS2(	movd	AS_REG_1d, xmm2)
106
	AS1(	not		AS_REG_1d)
107
	AS2(	movd	AS_REG_7d, xmm3)
108
	AS2(	or		AS_REG_1d, AS_REG_7d)
109
	AS2(	xor		eax, AS_REG_1d)
265 by weidai
SSE2 optimizations
110
111
#define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
112
113
#define pi(i)	\
376 by weidai
- port x64 assembly code to MASM
114
	AS2(	movd	AS_REG_1d, xmm7)\
115
	AS2(	rol		AS_REG_1d, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
116
	AS2(	mov		[AS_REG_2+SSE2_Index(ASM_MOD(5*(i), 17))*4], AS_REG_1d)
265 by weidai
SSE2 optimizations
117
118
#define pi4(x, y, z, a, b, c, d)	\
119
	AS2(	pcmpeqb	xmm7, xmm7)\
120
	AS2(	pxor	xmm7, x)\
121
	AS2(	por		xmm7, y)\
122
	AS2(	pxor	xmm7, z)\
123
	pi(a)\
124
	ASS(	pshuflw	xmm7, xmm7, 1, 0, 3, 2)\
125
	pi(b)\
126
	AS2(	punpckhqdq	xmm7, xmm7)\
127
	pi(c)\
128
	ASS(	pshuflw	xmm7, xmm7, 1, 0, 3, 2)\
129
	pi(d)
130
131
	pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
132
	pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
133
	pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
134
	pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
135
136
	// output keystream and update buffer here to hide partial memory stalls between pi and theta
137
	AS2(	movdqa	xmm4, xmm3)
138
	AS2(	punpcklqdq	xmm3, xmm2)		// 1 5 2 6
139
	AS2(	punpckhdq	xmm4, xmm2)		// 9 10 13 14
140
	AS2(	movdqa	xmm2, xmm1)
141
	AS2(	punpcklqdq	xmm1, xmm0)		// 3 7 4 8
142
	AS2(	punpckhdq	xmm2, xmm0)		// 11 12 15 16
143
144
	// keystream
376 by weidai
- port x64 assembly code to MASM
145
	AS2(	test	AS_REG_3, AS_REG_3)
265 by weidai
SSE2 optimizations
146
	ASJ(	jz,		0, f)
147
	AS2(	movdqa	xmm6, xmm4)
148
	AS2(	punpcklqdq	xmm4, xmm2)
149
	AS2(	punpckhqdq	xmm6, xmm2)
376 by weidai
- port x64 assembly code to MASM
150
	AS2(	test	AS_REG_4, 15)
265 by weidai
SSE2 optimizations
151
	ASJ(	jnz,	2, f)
376 by weidai
- port x64 assembly code to MASM
152
	AS2(	test	AS_REG_4, AS_REG_4)
265 by weidai
SSE2 optimizations
153
	ASJ(	jz,		1, f)
376 by weidai
- port x64 assembly code to MASM
154
	AS2(	pxor	xmm4, [AS_REG_4])
155
	AS2(	pxor	xmm6, [AS_REG_4+16])
156
	AS2(	add		AS_REG_4, 32)
265 by weidai
SSE2 optimizations
157
	ASJ(	jmp,	1, f)
158
	ASL(2)
376 by weidai
- port x64 assembly code to MASM
159
	AS2(	movdqu	xmm0, [AS_REG_4])
160
	AS2(	movdqu	xmm2, [AS_REG_4+16])
265 by weidai
SSE2 optimizations
161
	AS2(	pxor	xmm4, xmm0)
162
	AS2(	pxor	xmm6, xmm2)
376 by weidai
- port x64 assembly code to MASM
163
	AS2(	add		AS_REG_4, 32)
265 by weidai
SSE2 optimizations
164
	ASL(1)
376 by weidai
- port x64 assembly code to MASM
165
	AS2(	test	AS_REG_3, 15)
265 by weidai
SSE2 optimizations
166
	ASJ(	jnz,	3, f)
380 by weidai
fix compile with GAS 2.15
167
	AS2(	movdqa	XMMWORD_PTR [AS_REG_3], xmm4)
168
	AS2(	movdqa	XMMWORD_PTR [AS_REG_3+16], xmm6)
376 by weidai
- port x64 assembly code to MASM
169
	AS2(	add		AS_REG_3, 32)
265 by weidai
SSE2 optimizations
170
	ASJ(	jmp,	0, f)
171
	ASL(3)
380 by weidai
fix compile with GAS 2.15
172
	AS2(	movdqu	XMMWORD_PTR [AS_REG_3], xmm4)
173
	AS2(	movdqu	XMMWORD_PTR [AS_REG_3+16], xmm6)
376 by weidai
- port x64 assembly code to MASM
174
	AS2(	add		AS_REG_3, 32)
265 by weidai
SSE2 optimizations
175
	ASL(0)
176
177
	// buffer update
376 by weidai
- port x64 assembly code to MASM
178
	AS2(	lea		AS_REG_1, [AS_REG_6 + 32])
179
	AS2(	and		AS_REG_1, 31*32)
180
	AS2(	lea		AS_REG_7, [AS_REG_6 + (32-24)*32])
181
	AS2(	and		AS_REG_7, 31*32)
265 by weidai
SSE2 optimizations
182
380 by weidai
fix compile with GAS 2.15
183
	AS2(	movdqa	xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8])
265 by weidai
SSE2 optimizations
184
	AS2(	pxor	xmm3, xmm0)
185
	ASS(	pshufd	xmm0, xmm0, 2, 3, 0, 1)
380 by weidai
fix compile with GAS 2.15
186
	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3)
187
	AS2(	pxor	xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8])
188
	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0)
265 by weidai
SSE2 optimizations
189
380 by weidai
fix compile with GAS 2.15
190
	AS2(	movdqa	xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8])
265 by weidai
SSE2 optimizations
191
	AS2(	pxor	xmm1, xmm4)
380 by weidai
fix compile with GAS 2.15
192
	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1)
193
	AS2(	pxor	xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8])
194
	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4)
265 by weidai
SSE2 optimizations
195
196
	// theta
380 by weidai
fix compile with GAS 2.15
197
	AS2(	movdqa	xmm3, XMMWORD_PTR [AS_REG_2+3*16])
198
	AS2(	movdqa	xmm2, XMMWORD_PTR [AS_REG_2+2*16])
199
	AS2(	movdqa	xmm1, XMMWORD_PTR [AS_REG_2+1*16])
200
	AS2(	movdqa	xmm0, XMMWORD_PTR [AS_REG_2+0*16])
265 by weidai
SSE2 optimizations
201
202
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
376 by weidai
- port x64 assembly code to MASM
203
	AS2(	test	AS_REG_6, 1)
265 by weidai
SSE2 optimizations
204
	ASJ(	jnz,	8, f)
205
#endif
206
	AS2(	movd	xmm6, eax)
207
	AS2(	movdqa	xmm7, xmm3)
208
	AS2(	movss	xmm7, xmm6)
209
	AS2(	movdqa	xmm6, xmm2)
210
	AS2(	movss	xmm6, xmm3)
211
	AS2(	movdqa	xmm5, xmm1)
212
	AS2(	movss	xmm5, xmm2)
213
	AS2(	movdqa	xmm4, xmm0)
214
	AS2(	movss	xmm4, xmm1)
215
	ASS(	pshufd	xmm7, xmm7, 0, 3, 2, 1)
216
	ASS(	pshufd	xmm6, xmm6, 0, 3, 2, 1)
217
	ASS(	pshufd	xmm5, xmm5, 0, 3, 2, 1)
218
	ASS(	pshufd	xmm4, xmm4, 0, 3, 2, 1)
219
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
220
	ASJ(	jmp,	9, f)
221
	ASL(8)
222
	AS2(	movd	xmm7, eax)
223
	AS3(	palignr	xmm7, xmm3, 4)
224
	AS2(	movq	xmm6, xmm3)
225
	AS3(	palignr	xmm6, xmm2, 4)
226
	AS2(	movq	xmm5, xmm2)
227
	AS3(	palignr	xmm5, xmm1, 4)
228
	AS2(	movq	xmm4, xmm1)
229
	AS3(	palignr	xmm4, xmm0, 4)
230
	ASL(9)
231
#endif
232
233
	AS2(	xor		eax, 1)
376 by weidai
- port x64 assembly code to MASM
234
	AS2(	movd	AS_REG_1d, xmm0)
235
	AS2(	xor		eax, AS_REG_1d)
236
	AS2(	movd	AS_REG_1d, xmm3)
237
	AS2(	xor		eax, AS_REG_1d)
265 by weidai
SSE2 optimizations
238
239
	AS2(	pxor	xmm3, xmm2)
240
	AS2(	pxor	xmm2, xmm1)
241
	AS2(	pxor	xmm1, xmm0)
242
	AS2(	pxor	xmm0, xmm7)
243
	AS2(	pxor	xmm3, xmm7)
244
	AS2(	pxor	xmm2, xmm6)
245
	AS2(	pxor	xmm1, xmm5)
246
	AS2(	pxor	xmm0, xmm4)
247
248
	// sigma
376 by weidai
- port x64 assembly code to MASM
249
	AS2(	lea		AS_REG_1, [AS_REG_6 + (32-4)*32])
250
	AS2(	and		AS_REG_1, 31*32)
251
	AS2(	lea		AS_REG_7, [AS_REG_6 + 16*32])
252
	AS2(	and		AS_REG_7, 31*32)
265 by weidai
SSE2 optimizations
253
380 by weidai
fix compile with GAS 2.15
254
	AS2(	movdqa	xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16])
255
	AS2(	movdqa	xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16])
265 by weidai
SSE2 optimizations
256
	AS2(	movdqa	xmm6, xmm4)
257
	AS2(	punpcklqdq	xmm4, xmm5)
258
	AS2(	punpckhqdq	xmm6, xmm5)
259
	AS2(	pxor	xmm3, xmm4)
260
	AS2(	pxor	xmm2, xmm6)
261
380 by weidai
fix compile with GAS 2.15
262
	AS2(	movdqa	xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16])
263
	AS2(	movdqa	xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16])
265 by weidai
SSE2 optimizations
264
	AS2(	movdqa	xmm6, xmm4)
265
	AS2(	punpcklqdq	xmm4, xmm5)
266
	AS2(	punpckhqdq	xmm6, xmm5)
267
	AS2(	pxor	xmm1, xmm4)
268
	AS2(	pxor	xmm0, xmm6)
269
270
	// loop
376 by weidai
- port x64 assembly code to MASM
271
	AS2(	add		AS_REG_6, 32)
272
	AS2(	cmp		AS_REG_6, REG_loopEnd)
265 by weidai
SSE2 optimizations
273
	ASJ(	jne,	4, b)
274
275
	// save state
376 by weidai
- port x64 assembly code to MASM
276
	AS2(	mov		[AS_REG_2+4*16], eax)
380 by weidai
fix compile with GAS 2.15
277
	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+3*16], xmm3)
278
	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+2*16], xmm2)
279
	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+1*16], xmm1)
280
	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+0*16], xmm0)
376 by weidai
- port x64 assembly code to MASM
281
282
	#if CRYPTOPP_BOOL_X86
283
		AS2(	add		esp, 4)
284
		AS1(	pop		ebp)
285
	#endif
265 by weidai
SSE2 optimizations
286
	ASL(5)
287
497 by weidai
fix compile with GCC 4.4 and -march=i386
288
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
376 by weidai
- port x64 assembly code to MASM
289
		AS_POP_IF86(	bx)
290
		".att_syntax prefix;"
291
			:
292
	#if CRYPTOPP_BOOL_X64
293
			: "D" (count), "S" (state), "d" (z), "c" (y)
294
			: "%r8", "%r9", "r10", "%eax", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
295
	#else
296
			: "c" (count), "d" (state), "S" (z), "D" (y)
383 by weidai
remove -msse2 since we don't use SSE2 intrinsics anymore
297
			: "%eax", "memory", "cc"
376 by weidai
- port x64 assembly code to MASM
298
	#endif
265 by weidai
SSE2 optimizations
299
	);
300
#endif
376 by weidai
- port x64 assembly code to MASM
301
#ifdef CRYPTOPP_GENERATE_X64_MASM
302
	movdqa	xmm6, [rsp + 0h]
303
	movdqa	xmm7, [rsp + 10h]
440 by weidai
fixed Panama x64 MASM code not saving RDI
304
	add rsp, 2*16
305
	pop	rdi
376 by weidai
- port x64 assembly code to MASM
306
	ret
307
	Panama_SSE2_Pull ENDP
308
#else
265 by weidai
SSE2 optimizations
309
}
310
#endif
376 by weidai
- port x64 assembly code to MASM
311
#endif	// #ifdef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
312
313
#ifndef CRYPTOPP_GENERATE_X64_MASM
1 by weidai
Initial revision
314
315
template <class B>
503 by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856
316
void Panama<B>::Iterate(size_t count, const word32 *p, byte *output, const byte *input, KeystreamOperation operation)
1 by weidai
Initial revision
317
{
265 by weidai
SSE2 optimizations
318
	word32 bstart = m_state[17];
319
	word32 *const aPtr = m_state;
320
	word32 cPtr[17];
321
322
#define bPtr ((byte *)(aPtr+20))
323
324
// reorder the state for SSE2
325
// a and c: 4 8 12 16 | 3 7 11 15 | 2 6 10 14 | 1 5 9 13 | 0
326
//			xmm0		xmm1		xmm2		xmm3		eax
327
#define a(i) aPtr[((i)*13+16) % 17]		// 13 is inverse of 4 mod 17
328
#define c(i) cPtr[((i)*13+16) % 17]
329
// b: 0 4 | 1 5 | 2 6 | 3 7
330
#define b(i, j) b##i[(j)*2%8 + (j)/4]
1 by weidai
Initial revision
331
332
// buffer update
265 by weidai
SSE2 optimizations
333
#define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
334
#define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
1 by weidai
Initial revision
335
// gamma and pi
265 by weidai
SSE2 optimizations
336
#define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
1 by weidai
Initial revision
337
// theta and sigma
265 by weidai
SSE2 optimizations
338
#define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
1 by weidai
Initial revision
339
#define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
265 by weidai
SSE2 optimizations
340
#define TS1L(i) T(i+1, b(4,i))
341
#define TS2(i) T(i+9, b(16,i))
1 by weidai
Initial revision
342
343
	while (count--)
344
	{
503 by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856
345
		if (output)
1 by weidai
Initial revision
346
		{
503 by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856
347
#define PANAMA_OUTPUT(x)	\
348
	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, a(0+9));\
349
	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, a(1+9));\
350
	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, a(2+9));\
351
	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a(3+9));\
352
	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 4, a(4+9));\
353
	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 5, a(5+9));\
354
	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 6, a(6+9));\
355
	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 7, a(7+9));
356
357
			typedef word32 WordType;
358
			CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(PANAMA_OUTPUT, 4*8);
1 by weidai
Initial revision
359
		}
360
265 by weidai
SSE2 optimizations
361
		word32 *const b16 = (word32 *)(bPtr+((bstart+16*32) & 31*32));
362
		word32 *const b4 = (word32 *)(bPtr+((bstart+(32-4)*32) & 31*32));
363
       	bstart += 32;
364
		word32 *const b0 = (word32 *)(bPtr+((bstart) & 31*32));
365
		word32 *const b25 = (word32 *)(bPtr+((bstart+(32-25)*32) & 31*32));
1 by weidai
Initial revision
366
367
		if (p)
368
		{
369
			US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7);
370
		}
371
		else
372
		{
373
			UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7);
374
		}
375
265 by weidai
SSE2 optimizations
376
		GP(0); 
377
		GP(1); 
378
		GP(2); 
379
		GP(3); 
380
		GP(4); 
381
		GP(5); 
382
		GP(6); 
383
		GP(7);
384
		GP(8); 
385
		GP(9); 
386
		GP(10); 
387
		GP(11); 
388
		GP(12); 
389
		GP(13); 
390
		GP(14); 
391
		GP(15); 
392
		GP(16);
1 by weidai
Initial revision
393
394
		T(0,1);
395
396
		if (p)
397
		{
398
			TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7);
399
			p += 8;
400
		}
401
		else
402
		{
403
			TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7);
404
		}
405
406
		TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7);
407
	}
265 by weidai
SSE2 optimizations
408
	m_state[17] = bstart;
1 by weidai
Initial revision
409
}
410
348 by weidai
fix compile on Turbo C++ 2006
411
namespace Weak {
1 by weidai
Initial revision
412
template <class B>
348 by weidai
fix compile on Turbo C++ 2006
413
size_t PanamaHash<B>::HashMultipleBlocks(const word32 *input, size_t length)
1 by weidai
Initial revision
414
{
156 by weidai
port to GCC 3.4
415
	this->Iterate(length / this->BLOCKSIZE, input);
416
	return length % this->BLOCKSIZE;
1 by weidai
Initial revision
417
}
418
419
template <class B>
348 by weidai
fix compile on Turbo C++ 2006
420
void PanamaHash<B>::TruncatedFinal(byte *hash, size_t size)
1 by weidai
Initial revision
421
{
156 by weidai
port to GCC 3.4
422
	this->ThrowIfInvalidTruncatedSize(size);
1 by weidai
Initial revision
423
499 by weidai
fix for compiling with Clang from Marshall Clow
424
	this->PadLastBlock(this->BLOCKSIZE, 0x01);
1 by weidai
Initial revision
425
	
156 by weidai
port to GCC 3.4
426
	HashEndianCorrectedBlock(this->m_data);
427
428
	this->Iterate(32);	// pull
429
265 by weidai
SSE2 optimizations
430
	FixedSizeSecBlock<word32, 8> buf;
503 by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856
431
	this->Iterate(1, NULL, buf.BytePtr(), NULL);
265 by weidai
SSE2 optimizations
432
433
	memcpy(hash, buf, size);
156 by weidai
port to GCC 3.4
434
435
	this->Restart();		// reinit for next use
1 by weidai
Initial revision
436
}
348 by weidai
fix compile on Turbo C++ 2006
437
}
1 by weidai
Initial revision
438
439
template <class B>
184 by weidai
port to MSVC .NET 2005 beta 2
440
void PanamaCipherPolicy<B>::CipherSetKey(const NameValuePairs &params, const byte *key, size_t length)
1 by weidai
Initial revision
441
{
265 by weidai
SSE2 optimizations
442
	assert(length==32);
443
	memcpy(m_key, key, 32);
444
}
1 by weidai
Initial revision
445
265 by weidai
SSE2 optimizations
446
template <class B>
412 by weidai
changes for 5.6:
447
void PanamaCipherPolicy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
265 by weidai
SSE2 optimizations
448
{
412 by weidai
changes for 5.6:
449
	assert(length==32);
156 by weidai
port to GCC 3.4
450
	this->Reset();
265 by weidai
SSE2 optimizations
451
	this->Iterate(1, m_key);
452
	if (iv && IsAligned<word32>(iv))
453
		this->Iterate(1, (const word32 *)iv);
454
	else
455
	{
456
		FixedSizeSecBlock<word32, 8> buf;
457
		if (iv)
458
			memcpy(buf, iv, 32);
459
		else
460
			memset(buf, 0, 32);
461
		this->Iterate(1, buf);
462
	}
463
376 by weidai
- port x64 assembly code to MASM
464
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
442 by weidai
disable Panama SSE2 code for P4
465
	if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2() && !IsP4())		// SSE2 code is slower on P4 Prescott
265 by weidai
SSE2 optimizations
466
		Panama_SSE2_Pull(32, this->m_state, NULL, NULL);
467
	else
468
#endif
469
		this->Iterate(32);
470
}
471
472
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
473
template <class B>
474
unsigned int PanamaCipherPolicy<B>::GetAlignment() const
475
{
376 by weidai
- port x64 assembly code to MASM
476
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
265 by weidai
SSE2 optimizations
477
	if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
478
		return 16;
479
	else
480
#endif
481
		return 1;
482
}
483
#endif
1 by weidai
Initial revision
484
485
template <class B>
184 by weidai
port to MSVC .NET 2005 beta 2
486
void PanamaCipherPolicy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
1 by weidai
Initial revision
487
{
376 by weidai
- port x64 assembly code to MASM
488
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
265 by weidai
SSE2 optimizations
489
	if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
490
		Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)output, (const word32 *)input);
491
	else
492
#endif
503 by weidai
fix for makefile and Panama cipher validation failure on armel http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=619856
493
		this->Iterate(iterationCount, NULL, output, input, operation);
1 by weidai
Initial revision
494
}
495
496
template class Panama<BigEndian>;
497
template class Panama<LittleEndian>;
498
265 by weidai
SSE2 optimizations
499
template class Weak::PanamaHash<BigEndian>;
500
template class Weak::PanamaHash<LittleEndian>;
1 by weidai
Initial revision
501
502
template class PanamaCipherPolicy<BigEndian>;
503
template class PanamaCipherPolicy<LittleEndian>;
504
505
NAMESPACE_END
376 by weidai
- port x64 assembly code to MASM
506
507
#endif	// #ifndef CRYPTOPP_GENERATE_X64_MASM