8
7
NAMESPACE_BEGIN(CryptoPP)
11
10
void Panama<B>::Reset()
13
memset(m_state, 0, m_state.SizeInBytes());
14
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
15
m_state[17] = HasSSSE3();
19
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
21
#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
23
void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
28
".intel_syntax noprefix;"
31
AS2( mov WORD_REG(cx), count)
32
AS2( mov WORD_REG(si), state)
33
AS2( mov WORD_REG(di), z)
34
AS2( mov WORD_REG(dx), y)
36
AS2( shl WORD_REG(cx), 5)
38
AS2( mov ebx, [WORD_REG(si)+4*17])
39
AS2( add WORD_REG(cx), WORD_REG(bx))
44
AS2( movdqa xmm0, [WORD_REG(si)+0*16])
45
AS2( movdqa xmm1, [WORD_REG(si)+1*16])
46
AS2( movdqa xmm2, [WORD_REG(si)+2*16])
47
AS2( movdqa xmm3, [WORD_REG(si)+3*16])
48
AS2( mov eax, [WORD_REG(si)+4*16])
52
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
53
AS2( test WORD_REG(bx), 1)
56
AS2( movdqa xmm6, xmm2)
57
AS2( movss xmm6, xmm3)
58
ASS( pshufd xmm5, xmm6, 0, 3, 2, 1)
60
AS2( movdqa xmm7, xmm3)
61
AS2( movss xmm7, xmm6)
62
ASS( pshufd xmm6, xmm7, 0, 3, 2, 1)
63
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
66
AS2( movdqa xmm5, xmm3)
67
AS3( palignr xmm5, xmm2, 4)
69
AS3( palignr xmm6, xmm3, 4)
79
#define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
83
AS2( rol ecx, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
84
AS2( mov [WORD_REG(si)+SSE2_Index(ASM_MOD(5*(i), 17))*4], ecx)
86
#define pi4(x, y, z, a, b, c, d) \
87
AS2( pcmpeqb xmm7, xmm7)\
92
ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
94
AS2( punpckhqdq xmm7, xmm7)\
96
ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
99
pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
100
pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
101
pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
102
pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
104
// output keystream and update buffer here to hide partial memory stalls between pi and theta
105
AS2( movdqa xmm4, xmm3)
106
AS2( punpcklqdq xmm3, xmm2) // 1 5 2 6
107
AS2( punpckhdq xmm4, xmm2) // 9 10 13 14
108
AS2( movdqa xmm2, xmm1)
109
AS2( punpcklqdq xmm1, xmm0) // 3 7 4 8
110
AS2( punpckhdq xmm2, xmm0) // 11 12 15 16
113
AS2( test WORD_REG(di), WORD_REG(di))
115
AS2( movdqa xmm6, xmm4)
116
AS2( punpcklqdq xmm4, xmm2)
117
AS2( punpckhqdq xmm6, xmm2)
118
AS2( test WORD_REG(dx), 0xf)
120
AS2( test WORD_REG(dx), WORD_REG(dx))
122
AS2( pxor xmm4, [WORD_REG(dx)])
123
AS2( pxor xmm6, [WORD_REG(dx)+16])
124
AS2( add WORD_REG(dx), 32)
127
AS2( movdqu xmm0, [WORD_REG(dx)])
128
AS2( movdqu xmm2, [WORD_REG(dx)+16])
129
AS2( pxor xmm4, xmm0)
130
AS2( pxor xmm6, xmm2)
131
AS2( add WORD_REG(dx), 32)
133
AS2( test WORD_REG(di), 0xf)
135
AS2( movdqa [WORD_REG(di)], xmm4)
136
AS2( movdqa [WORD_REG(di)+16], xmm6)
137
AS2( add WORD_REG(di), 32)
140
AS2( movdqu [WORD_REG(di)], xmm4)
141
AS2( movdqu [WORD_REG(di)+16], xmm6)
142
AS2( add WORD_REG(di), 32)
146
AS2( lea WORD_REG(cx), [WORD_REG(bx) + 32])
147
AS2( and WORD_REG(cx), 31*32)
148
AS2( lea WORD_REG(bp), [WORD_REG(bx) + (32-24)*32])
149
AS2( and WORD_REG(bp), 31*32)
151
AS2( movdqa xmm0, [WORD_REG(si)+20*4+WORD_REG(cx)+0*8])
152
AS2( pxor xmm3, xmm0)
153
ASS( pshufd xmm0, xmm0, 2, 3, 0, 1)
154
AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(cx)+0*8], xmm3)
155
AS2( pxor xmm0, [WORD_REG(si)+20*4+WORD_REG(bp)+2*8])
156
AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(bp)+2*8], xmm0)
158
AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+2*8])
159
AS2( pxor xmm1, xmm4)
160
AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(cx)+2*8], xmm1)
161
AS2( pxor xmm4, [WORD_REG(si)+20*4+WORD_REG(bp)+0*8])
162
AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(bp)+0*8], xmm4)
165
AS2( movdqa xmm3, [WORD_REG(si)+3*16])
166
AS2( movdqa xmm2, [WORD_REG(si)+2*16])
167
AS2( movdqa xmm1, [WORD_REG(si)+1*16])
168
AS2( movdqa xmm0, [WORD_REG(si)+0*16])
170
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
171
AS2( test WORD_REG(bx), 1)
175
AS2( movdqa xmm7, xmm3)
176
AS2( movss xmm7, xmm6)
177
AS2( movdqa xmm6, xmm2)
178
AS2( movss xmm6, xmm3)
179
AS2( movdqa xmm5, xmm1)
180
AS2( movss xmm5, xmm2)
181
AS2( movdqa xmm4, xmm0)
182
AS2( movss xmm4, xmm1)
183
ASS( pshufd xmm7, xmm7, 0, 3, 2, 1)
184
ASS( pshufd xmm6, xmm6, 0, 3, 2, 1)
185
ASS( pshufd xmm5, xmm5, 0, 3, 2, 1)
186
ASS( pshufd xmm4, xmm4, 0, 3, 2, 1)
187
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
191
AS3( palignr xmm7, xmm3, 4)
192
AS2( movq xmm6, xmm3)
193
AS3( palignr xmm6, xmm2, 4)
194
AS2( movq xmm5, xmm2)
195
AS3( palignr xmm5, xmm1, 4)
196
AS2( movq xmm4, xmm1)
197
AS3( palignr xmm4, xmm0, 4)
207
AS2( pxor xmm3, xmm2)
208
AS2( pxor xmm2, xmm1)
209
AS2( pxor xmm1, xmm0)
210
AS2( pxor xmm0, xmm7)
211
AS2( pxor xmm3, xmm7)
212
AS2( pxor xmm2, xmm6)
213
AS2( pxor xmm1, xmm5)
214
AS2( pxor xmm0, xmm4)
217
AS2( lea WORD_REG(cx), [WORD_REG(bx) + (32-4)*32])
218
AS2( and WORD_REG(cx), 31*32)
219
AS2( lea WORD_REG(bp), [WORD_REG(bx) + 16*32])
220
AS2( and WORD_REG(bp), 31*32)
222
AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+0*16])
223
AS2( movdqa xmm5, [WORD_REG(si)+20*4+WORD_REG(bp)+0*16])
224
AS2( movdqa xmm6, xmm4)
225
AS2( punpcklqdq xmm4, xmm5)
226
AS2( punpckhqdq xmm6, xmm5)
227
AS2( pxor xmm3, xmm4)
228
AS2( pxor xmm2, xmm6)
230
AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+1*16])
231
AS2( movdqa xmm5, [WORD_REG(si)+20*4+WORD_REG(bp)+1*16])
232
AS2( movdqa xmm6, xmm4)
233
AS2( punpcklqdq xmm4, xmm5)
234
AS2( punpckhqdq xmm6, xmm5)
235
AS2( pxor xmm1, xmm4)
236
AS2( pxor xmm0, xmm6)
239
AS2( add WORD_REG(bx), 32)
240
AS2( cmp WORD_REG(bx), [WORD_REG(sp)])
244
AS2( add WORD_REG(sp), WORD_SZ)
246
AS2( mov [WORD_REG(si)+4*16], eax)
247
AS2( movdqa [WORD_REG(si)+3*16], xmm3)
248
AS2( movdqa [WORD_REG(si)+2*16], xmm2)
249
AS2( movdqa [WORD_REG(si)+1*16], xmm1)
250
AS2( movdqa [WORD_REG(si)+0*16], xmm0)
255
".att_syntax prefix;"
257
: "c" (count), "S" (state), "D" (z), "d" (y)
258
: "%eax", "memory", "cc"
13
memset(m_state, 0, m_state.size()*4);
265
16
template <class B>
266
17
void Panama<B>::Iterate(size_t count, const word32 *p, word32 *z, const word32 *y)
268
word32 bstart = m_state[17];
269
word32 *const aPtr = m_state;
272
#define bPtr ((byte *)(aPtr+20))
274
// reorder the state for SSE2
275
// a and c: 4 8 12 16 | 3 7 11 15 | 2 6 10 14 | 1 5 9 13 | 0
276
// xmm0 xmm1 xmm2 xmm3 eax
277
#define a(i) aPtr[((i)*13+16) % 17] // 13 is inverse of 4 mod 17
278
#define c(i) cPtr[((i)*13+16) % 17]
279
// b: 0 4 | 1 5 | 2 6 | 3 7
280
#define b(i, j) b##i[(j)*2%8 + (j)/4]
19
unsigned int bstart = m_bstart;
20
word32 *const a = m_state;
22
#define b ((Stage *)(a+34))
283
#define OA(i) z[i] = ConditionalByteReverse(B::ToEnum(), a(i+9))
284
#define OX(i) z[i] = y[i] ^ ConditionalByteReverse(B::ToEnum(), a(i+9))
25
#define OA(i) z[i] = ConditionalByteReverse(B::ToEnum(), a[i+9])
26
#define OX(i) z[i] = y[i] ^ ConditionalByteReverse(B::ToEnum(), a[i+9])
286
#define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
287
#define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
28
#define US(i) {word32 t=b0[i]; b0[i]=ConditionalByteReverse(B::ToEnum(), p[i])^t; b25[(i+6)%8]^=t;}
29
#define UL(i) {word32 t=b0[i]; b0[i]=a[i+1]^t; b25[(i+6)%8]^=t;}
289
#define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
31
#define GP(i) c[5*i%17] = rotlFixed(a[i] ^ (a[(i+1)%17] | ~a[(i+2)%17]), ((5*i%17)*((5*i%17)+1)/2)%32)
290
32
// theta and sigma
291
#define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
33
#define T(i,x) a[i] = c[i] ^ c[(i+1)%17] ^ c[(i+4)%17] ^ x
292
34
#define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
293
#define TS1L(i) T(i+1, b(4,i))
294
#define TS2(i) T(i+9, b(16,i))
35
#define TS1L(i) T(i+1, b4[i])
36
#define TS2(i) T(i+9, b16[i])
379
106
this->Iterate(32); // pull
381
FixedSizeSecBlock<word32, 8> buf;
382
this->Iterate(1, NULL, buf, NULL);
384
memcpy(hash, buf, size);
108
ConditionalByteReverse(B::ToEnum(), this->m_state+9, this->m_state+9, DIGESTSIZE);
109
memcpy(hash, this->m_state+9, size);
386
111
this->Restart(); // reinit for next use
390
114
template <class B>
391
115
void PanamaCipherPolicy<B>::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
394
memcpy(m_key, key, 32);
117
FixedSizeSecBlock<word32, 8> buf;
398
void PanamaCipherPolicy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *iv)
401
this->Iterate(1, m_key);
402
if (iv && IsAligned<word32>(iv))
403
this->Iterate(1, (const word32 *)iv);
406
FixedSizeSecBlock<word32, 8> buf;
411
this->Iterate(1, buf);
414
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
415
if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
416
Panama_SSE2_Pull(32, this->m_state, NULL, NULL);
422
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
424
unsigned int PanamaCipherPolicy<B>::GetAlignment() const
426
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
427
if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
120
memcpy(buf, key, 32);
121
this->Iterate(1, buf);
123
memcpy(buf, key+32, 32);
126
this->Iterate(1, buf);
435
131
template <class B>
436
132
void PanamaCipherPolicy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
438
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
439
if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
440
Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)output, (const word32 *)input);
443
this->Iterate(iterationCount, NULL, (word32 *)output, (const word32 *)input);
134
this->Iterate(iterationCount, NULL, (word32 *)output, (const word32 *)input);
446
137
template class Panama<BigEndian>;
447
138
template class Panama<LittleEndian>;
449
template class Weak::PanamaHash<BigEndian>;
450
template class Weak::PanamaHash<LittleEndian>;
140
template class PanamaHash<BigEndian>;
141
template class PanamaHash<LittleEndian>;
452
143
template class PanamaCipherPolicy<BigEndian>;
453
144
template class PanamaCipherPolicy<LittleEndian>;