~zooko/cryptopp/trunk

« back to all changes in this revision

Viewing changes to salsa.cpp

  • Committer: weidai
  • Date: 2007-05-04 15:04:58 UTC
  • Revision ID: svn-v4:57ff6487-cd31-0410-9ec3-f628ee90f5f0:trunk/c5:328
reduce risk of random number reuse after VM rollback

Show diffs side-by-side

added added

removed removed

Lines of Context:
4
4
#include "salsa.h"
5
5
#include "misc.h"
6
6
#include "argnames.h"
 
7
#include "cpu.h"
 
8
 
 
9
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
 
10
#include <emmintrin.h>
 
11
#endif
7
12
 
8
13
NAMESPACE_BEGIN(CryptoPP)
9
14
 
12
17
        Salsa20::Encryption x;
13
18
}
14
19
 
15
 
void Salsa20_Policy::GetNextIV(byte *IV) const
 
20
void Salsa20_Policy::CipherGetNextIV(byte *IV)
16
21
{
17
 
        word32 j6 = m_state[6] + 1;
18
 
        word32 j7 = m_state[7] + (j6 == 0);
19
 
 
20
 
        UnalignedPutWord(LITTLE_ENDIAN_ORDER, IV, j6);
21
 
        UnalignedPutWord(LITTLE_ENDIAN_ORDER, IV+4, j7);
 
22
        word32 j6, j7;
 
23
 
 
24
        j6 = m_state[14] + 1;
 
25
        j7 = m_state[11] + (j6 == 0);
 
26
 
 
27
        PutWord(false, LITTLE_ENDIAN_ORDER, IV, j6);
 
28
        PutWord(false, LITTLE_ENDIAN_ORDER, IV+4, j7);
22
29
}
23
30
 
24
31
void Salsa20_Policy::CipherSetKey(const NameValuePairs &params, const byte *key, size_t length)
28
35
        if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
29
36
                throw InvalidRounds(StaticAlgorithmName(), m_rounds);
30
37
 
31
 
        GetUserKey(LITTLE_ENDIAN_ORDER, m_state+1, 4, key, 16);
32
 
        GetUserKey(LITTLE_ENDIAN_ORDER, m_state+11, 4, key + length - 16, 16);
 
38
        // m_state is reordered for SSE2
 
39
        GetBlock<word32, LittleEndian, false> get1(key);
 
40
        get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
 
41
        GetBlock<word32, LittleEndian, false> get2(key + length - 16);
 
42
        get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
33
43
 
34
 
        // m_state[0,5,10,15] forms "expand 16-byte k" or "expand 32-byte k"
 
44
        // "expand 16-byte k" or "expand 32-byte k"
35
45
        m_state[0] = 0x61707865;
36
 
        m_state[5] = (length == 16) ? 0x3120646e : 0x3320646e;
37
 
        m_state[10] = (length == 16) ? 0x79622d36 : 0x79622d32;
38
 
        m_state[15] = 0x6b206574;
 
46
        m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
 
47
        m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
 
48
        m_state[3] = 0x6b206574;
39
49
}
40
50
 
41
51
void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV)
42
52
{
43
 
        GetUserKey(LITTLE_ENDIAN_ORDER, m_state+6, 4, IV, 8);
 
53
        GetBlock<word32, LittleEndian, false> get(IV);
 
54
        get(m_state[14])(m_state[11]);
 
55
        m_state[8] = m_state[5] = 0;
44
56
}
45
57
 
46
58
void Salsa20_Policy::SeekToIteration(lword iterationCount)
47
59
{
48
60
        m_state[8] = (word32)iterationCount;
49
 
        m_state[9] = (word32)SafeRightShift<32>(iterationCount);
50
 
}
 
61
        m_state[5] = (word32)SafeRightShift<32>(iterationCount);
 
62
}
 
63
 
 
64
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
 
65
unsigned int Salsa20_Policy::GetAlignment() const
 
66
{
 
67
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
 
68
        if (HasSSE2())
 
69
                return 16;
 
70
        else
 
71
#endif
 
72
                return 1;
 
73
}
 
74
 
 
75
unsigned int Salsa20_Policy::GetOptimalBlockSize() const
 
76
{
 
77
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
 
78
        if (HasSSE2())
 
79
                return 4*BYTES_PER_ITERATION;
 
80
        else
 
81
#endif
 
82
                return BYTES_PER_ITERATION;
 
83
}
 
84
#endif
51
85
 
52
86
void Salsa20_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
53
87
{
54
 
        KeystreamOutput<LittleEndian> keystreamOutput(operation, output, input);
 
88
        int i;
 
89
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
 
90
        if (HasSSE2())
 
91
        {
 
92
                __m128i *s = (__m128i *)m_state.data();
 
93
 
 
94
                if (iterationCount >= 4)
 
95
                {
 
96
                        __m128i ss[16];
 
97
                        ss[0] = _mm_shuffle_epi32(s[0], _MM_SHUFFLE(0, 0, 0, 0));
 
98
                        ss[1] = _mm_shuffle_epi32(s[0], _MM_SHUFFLE(1, 1, 1, 1));
 
99
                        ss[2] = _mm_shuffle_epi32(s[0], _MM_SHUFFLE(2, 2, 2, 2));
 
100
                        ss[3] = _mm_shuffle_epi32(s[0], _MM_SHUFFLE(3, 3, 3, 3));
 
101
                        ss[4] = _mm_shuffle_epi32(s[1], _MM_SHUFFLE(0, 0, 0, 0));
 
102
                        ss[6] = _mm_shuffle_epi32(s[1], _MM_SHUFFLE(2, 2, 2, 2));
 
103
                        ss[7] = _mm_shuffle_epi32(s[1], _MM_SHUFFLE(3, 3, 3, 3));
 
104
                        ss[9] = _mm_shuffle_epi32(s[2], _MM_SHUFFLE(1, 1, 1, 1));
 
105
                        ss[10] = _mm_shuffle_epi32(s[2], _MM_SHUFFLE(2, 2, 2, 2));
 
106
                        ss[11] = _mm_shuffle_epi32(s[2], _MM_SHUFFLE(3, 3, 3, 3));
 
107
                        ss[12] = _mm_shuffle_epi32(s[3], _MM_SHUFFLE(0, 0, 0, 0));
 
108
                        ss[13] = _mm_shuffle_epi32(s[3], _MM_SHUFFLE(1, 1, 1, 1));
 
109
                        ss[14] = _mm_shuffle_epi32(s[3], _MM_SHUFFLE(2, 2, 2, 2));
 
110
                        ss[15] = _mm_shuffle_epi32(s[3], _MM_SHUFFLE(3, 3, 3, 3));
 
111
 
 
112
                        do
 
113
                        {
 
114
                                word32 *countersLo = (word32*)&(ss[8]), *countersHi = (word32*)&(ss[5]);
 
115
                                for (i=0; i<4; i++)
 
116
                                {
 
117
                                        countersLo[i] = m_state[8];
 
118
                                        countersHi[i] = m_state[5];
 
119
                                        if (++m_state[8] == 0)
 
120
                                                ++m_state[5];
 
121
                                }
 
122
 
 
123
                                __m128i x0 = ss[0];
 
124
                                __m128i x1 = ss[1];
 
125
                                __m128i x2 = ss[2];
 
126
                                __m128i x3 = ss[3];
 
127
                                __m128i x4 = ss[4];
 
128
                                __m128i x5 = ss[5];
 
129
                                __m128i x6 = ss[6];
 
130
                                __m128i x7 = ss[7];
 
131
                                __m128i x8 = ss[8];
 
132
                                __m128i x9 = ss[9];
 
133
                                __m128i x10 = ss[10];
 
134
                                __m128i x11 = ss[11];
 
135
                                __m128i x12 = ss[12];
 
136
                                __m128i x13 = ss[13];
 
137
                                __m128i x14 = ss[14];
 
138
                                __m128i x15 = ss[15];
 
139
 
 
140
                                for (i=m_rounds; i>0; i-=2)
 
141
                                {
 
142
                                        #define SSE2_QUARTER_ROUND(a, b, d, i)                          {\
 
143
                                                __m128i t = _mm_add_epi32(a, d);                                \
 
144
                                                b = _mm_xor_si128(b, _mm_slli_epi32(t, i));             \
 
145
                                                b = _mm_xor_si128(b, _mm_srli_epi32(t, 32-i));}
 
146
 
 
147
                                        #define QUARTER_ROUND(a, b, c, d)       \
 
148
                                                SSE2_QUARTER_ROUND(a, b, d, 7)  \
 
149
                                                SSE2_QUARTER_ROUND(b, c, a, 9)  \
 
150
                                                SSE2_QUARTER_ROUND(c, d, b, 13) \
 
151
                                                SSE2_QUARTER_ROUND(d, a, c, 18) 
 
152
 
 
153
                                        QUARTER_ROUND(x0, x4, x8, x12)
 
154
                                        QUARTER_ROUND(x1, x5, x9, x13)
 
155
                                        QUARTER_ROUND(x2, x6, x10, x14)
 
156
                                        QUARTER_ROUND(x3, x7, x11, x15)
 
157
 
 
158
                                        QUARTER_ROUND(x0, x13, x10, x7)
 
159
                                        QUARTER_ROUND(x1, x14, x11, x4)
 
160
                                        QUARTER_ROUND(x2, x15, x8, x5)
 
161
                                        QUARTER_ROUND(x3, x12, x9, x6)
 
162
 
 
163
                                        #undef QUARTER_ROUND
 
164
                                }
 
165
 
 
166
                                x0 = _mm_add_epi32(x0, ss[0]);
 
167
                                x1 = _mm_add_epi32(x1, ss[1]);
 
168
                                x2 = _mm_add_epi32(x2, ss[2]);
 
169
                                x3 = _mm_add_epi32(x3, ss[3]);
 
170
                                x4 = _mm_add_epi32(x4, ss[4]);
 
171
                                x5 = _mm_add_epi32(x5, ss[5]);
 
172
                                x6 = _mm_add_epi32(x6, ss[6]);
 
173
                                x7 = _mm_add_epi32(x7, ss[7]);
 
174
                                x8 = _mm_add_epi32(x8, ss[8]);
 
175
                                x9 = _mm_add_epi32(x9, ss[9]);
 
176
                                x10 = _mm_add_epi32(x10, ss[10]);
 
177
                                x11 = _mm_add_epi32(x11, ss[11]);
 
178
                                x12 = _mm_add_epi32(x12, ss[12]);
 
179
                                x13 = _mm_add_epi32(x13, ss[13]);
 
180
                                x14 = _mm_add_epi32(x14, ss[14]);
 
181
                                x15 = _mm_add_epi32(x15, ss[15]);
 
182
 
 
183
                                #define OUTPUT_4(x, a, b, c, d, e, f, g, h)     {\
 
184
                                        __m128i t0 = _mm_unpacklo_epi32(a, b);\
 
185
                                        __m128i t1 = _mm_unpacklo_epi32(c, d);\
 
186
                                        __m128i t2 = _mm_unpacklo_epi64(t0, t1);\
 
187
                                        CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, e, t2)\
 
188
                                        t2 = _mm_unpackhi_epi64(t0, t1);\
 
189
                                        CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, f, t2)\
 
190
                                        t0 = _mm_unpackhi_epi32(a, b);\
 
191
                                        t1 = _mm_unpackhi_epi32(c, d);\
 
192
                                        t2 = _mm_unpacklo_epi64(t0, t1);\
 
193
                                        CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, g, t2)\
 
194
                                        t2 = _mm_unpackhi_epi64(t0, t1);\
 
195
                                        CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, h, t2)}
 
196
 
 
197
                                #define SALSA_OUTPUT(x)         \
 
198
                                        OUTPUT_4(x, x0, x13, x10, x7, 0, 4, 8, 12)\
 
199
                                        OUTPUT_4(x, x4, x1, x14, x11, 1, 5, 9, 13)\
 
200
                                        OUTPUT_4(x, x8, x5, x2, x15, 2, 6, 10, 14)\
 
201
                                        OUTPUT_4(x, x12, x9, x6, x3, 3, 7, 11, 15)
 
202
 
 
203
                                CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SALSA_OUTPUT, 4*BYTES_PER_ITERATION)
 
204
 
 
205
                                #undef SALSA_OUTPUT
 
206
                        } while ((iterationCount-=4) >= 4);
 
207
                }
 
208
 
 
209
                if (!IsP4()) while (iterationCount)
 
210
                {
 
211
                        --iterationCount;
 
212
                        __m128i x0 = s[0];
 
213
                        __m128i x1 = s[1];
 
214
                        __m128i x2 = s[2];
 
215
                        __m128i x3 = s[3];
 
216
 
 
217
                        for (i=m_rounds; i>0; i-=2)
 
218
                        {
 
219
                                SSE2_QUARTER_ROUND(x0, x1, x3, 7)
 
220
                                SSE2_QUARTER_ROUND(x1, x2, x0, 9)
 
221
                                SSE2_QUARTER_ROUND(x2, x3, x1, 13)
 
222
                                SSE2_QUARTER_ROUND(x3, x0, x2, 18)
 
223
 
 
224
                                x1 = _mm_shuffle_epi32(x1, _MM_SHUFFLE(2, 1, 0, 3));
 
225
                                x2 = _mm_shuffle_epi32(x2, _MM_SHUFFLE(1, 0, 3, 2));
 
226
                                x3 = _mm_shuffle_epi32(x3, _MM_SHUFFLE(0, 3, 2, 1));
 
227
 
 
228
                                SSE2_QUARTER_ROUND(x0, x3, x1, 7)
 
229
                                SSE2_QUARTER_ROUND(x3, x2, x0, 9)
 
230
                                SSE2_QUARTER_ROUND(x2, x1, x3, 13)
 
231
                                SSE2_QUARTER_ROUND(x1, x0, x2, 18)
 
232
 
 
233
                                x1 = _mm_shuffle_epi32(x1, _MM_SHUFFLE(0, 3, 2, 1));
 
234
                                x2 = _mm_shuffle_epi32(x2, _MM_SHUFFLE(1, 0, 3, 2));
 
235
                                x3 = _mm_shuffle_epi32(x3, _MM_SHUFFLE(2, 1, 0, 3));
 
236
                        }
 
237
 
 
238
                        x0 = _mm_add_epi32(x0, s[0]);
 
239
                        x1 = _mm_add_epi32(x1, s[1]);
 
240
                        x2 = _mm_add_epi32(x2, s[2]);
 
241
                        x3 = _mm_add_epi32(x3, s[3]);
 
242
 
 
243
                        if (++m_state[8] == 0)
 
244
                                ++m_state[5];
 
245
 
 
246
                        CRYPTOPP_ALIGN_DATA(16) static const word32 masks[8] CRYPTOPP_SECTION_ALIGN16 = 
 
247
                                {0, 0xffffffff, 0, 0xffffffff, 0xffffffff, 0, 0xffffffff, 0};
 
248
 
 
249
                        __m128i k02 = _mm_or_si128(_mm_slli_epi64(x0, 32), _mm_srli_epi64(x3, 32));
 
250
                        k02 = _mm_shuffle_epi32(k02, _MM_SHUFFLE(0, 1, 2, 3));
 
251
                        __m128i k13 = _mm_or_si128(_mm_slli_epi64(x1, 32), _mm_srli_epi64(x0, 32));
 
252
                        k13 = _mm_shuffle_epi32(k13, _MM_SHUFFLE(0, 1, 2, 3));
 
253
                        __m128i maskLo32 = ((__m128i*)masks)[1], maskHi32 = ((__m128i*)masks)[0];
 
254
                        __m128i k20 = _mm_or_si128(_mm_and_si128(x2, maskLo32), _mm_and_si128(x1, maskHi32));
 
255
                        __m128i k31 = _mm_or_si128(_mm_and_si128(x3, maskLo32), _mm_and_si128(x2, maskHi32));
 
256
 
 
257
                        __m128i k0 = _mm_unpackhi_epi64(k02, k20);
 
258
                        __m128i k1 = _mm_unpackhi_epi64(k13, k31);
 
259
                        __m128i k2 = _mm_unpacklo_epi64(k20, k02);
 
260
                        __m128i k3 = _mm_unpacklo_epi64(k31, k13);
 
261
 
 
262
                        #define SSE2_OUTPUT(x)  {\
 
263
                                CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, 0, k0)\
 
264
                                CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, 1, k1)\
 
265
                                CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, 2, k2)\
 
266
                                CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, 3, k3)}
 
267
 
 
268
                        CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SSE2_OUTPUT, BYTES_PER_ITERATION);
 
269
                }
 
270
        }
 
271
#endif
55
272
 
56
273
        word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
57
 
        word32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
58
 
 
59
 
        j0 = m_state[0];
60
 
        j1 = m_state[1];
61
 
        j2 = m_state[2];
62
 
        j3 = m_state[3];
63
 
        j4 = m_state[4];
64
 
        j5 = m_state[5];
65
 
        j6 = m_state[6];
66
 
        j7 = m_state[7];
67
 
        j8 = m_state[8];
68
 
        j9 = m_state[9];
69
 
        j10 = m_state[10];
70
 
        j11 = m_state[11];
71
 
        j12 = m_state[12];
72
 
        j13 = m_state[13];
73
 
        j14 = m_state[14];
74
 
        j15 = m_state[15];
75
 
 
76
 
        for (size_t iteration = 0; iteration < iterationCount; ++iteration)
 
274
 
 
275
        while (iterationCount--)
77
276
        {
78
 
                x0 = j0;
79
 
                x1 = j1;
80
 
                x2 = j2;
81
 
                x3 = j3;
82
 
                x4 = j4;
83
 
                x5 = j5;
84
 
                x6 = j6;
85
 
                x7 = j7;
86
 
                x8 = j8;
87
 
                x9 = j9;
88
 
                x10 = j10;
89
 
                x11 = j11;
90
 
                x12 = j12;
91
 
                x13 = j13;
92
 
                x14 = j14;
93
 
                x15 = j15;
 
277
                x0 = m_state[0];
 
278
                x1 = m_state[1];
 
279
                x2 = m_state[2];
 
280
                x3 = m_state[3];
 
281
                x4 = m_state[4];
 
282
                x5 = m_state[5];
 
283
                x6 = m_state[6];
 
284
                x7 = m_state[7];
 
285
                x8 = m_state[8];
 
286
                x9 = m_state[9];
 
287
                x10 = m_state[10];
 
288
                x11 = m_state[11];
 
289
                x12 = m_state[12];
 
290
                x13 = m_state[13];
 
291
                x14 = m_state[14];
 
292
                x15 = m_state[15];
94
293
 
95
 
                for (int i=m_rounds; i>0; i-=2)
 
294
                for (i=m_rounds; i>0; i-=2)
96
295
                {
97
 
#define QUARTER_ROUND(a, b, c, d)       \
98
 
        b = b ^ rotlFixed(a + d, 7);    \
99
 
        c = c ^ rotlFixed(b + a, 9);    \
100
 
        d = d ^ rotlFixed(c + b, 13);   \
101
 
        a = a ^ rotlFixed(d + c, 18);
 
296
                        #define QUARTER_ROUND(a, b, c, d)       \
 
297
                                b = b ^ rotlFixed(a + d, 7);    \
 
298
                                c = c ^ rotlFixed(b + a, 9);    \
 
299
                                d = d ^ rotlFixed(c + b, 13);   \
 
300
                                a = a ^ rotlFixed(d + c, 18);
102
301
 
103
302
                        QUARTER_ROUND(x0, x4, x8, x12)
104
 
                        QUARTER_ROUND(x5, x9, x13, x1)
105
 
                        QUARTER_ROUND(x10, x14, x2, x6)
106
 
                        QUARTER_ROUND(x15, x3, x7, x11)
 
303
                        QUARTER_ROUND(x1, x5, x9, x13)
 
304
                        QUARTER_ROUND(x2, x6, x10, x14)
 
305
                        QUARTER_ROUND(x3, x7, x11, x15)
107
306
 
108
 
                        QUARTER_ROUND(x0, x1, x2, x3)
109
 
                        QUARTER_ROUND(x5, x6, x7, x4)
110
 
                        QUARTER_ROUND(x10, x11, x8, x9)
111
 
                        QUARTER_ROUND(x15, x12, x13, x14)
 
307
                        QUARTER_ROUND(x0, x13, x10, x7)
 
308
                        QUARTER_ROUND(x1, x14, x11, x4)
 
309
                        QUARTER_ROUND(x2, x15, x8, x5)
 
310
                        QUARTER_ROUND(x3, x12, x9, x6)
112
311
                }
113
312
 
114
 
                keystreamOutput (x0 + j0)
115
 
                                                (x1 + j1)
116
 
                                                (x2 + j2)
117
 
                                                (x3 + j3)
118
 
                                                (x4 + j4)
119
 
                                                (x5 + j5)
120
 
                                                (x6 + j6)
121
 
                                                (x7 + j7)
122
 
                                                (x8 + j8)
123
 
                                                (x9 + j9)
124
 
                                                (x10 + j10)
125
 
                                                (x11 + j11)
126
 
                                                (x12 + j12)
127
 
                                                (x13 + j13)
128
 
                                                (x14 + j14)
129
 
                                                (x15 + j15);
130
 
 
131
 
                if (++j8 == 0)
132
 
                        ++j9;
 
313
                #define SALSA_OUTPUT(x) {\
 
314
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\
 
315
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\
 
316
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\
 
317
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\
 
318
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\
 
319
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\
 
320
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\
 
321
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\
 
322
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\
 
323
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\
 
324
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\
 
325
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\
 
326
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\
 
327
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\
 
328
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\
 
329
                        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);}
 
330
 
 
331
                CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SALSA_OUTPUT, BYTES_PER_ITERATION);
 
332
 
 
333
                if (++m_state[8] == 0)
 
334
                        ++m_state[5];
133
335
        }
134
 
 
135
 
        m_state[8] = j8;
136
 
        m_state[9] = j9;
137
336
}
138
337
 
139
338
NAMESPACE_END