11
typedef uint32_t word32;
13
static word32 rotrFixed(word32 word, unsigned int shift)
15
return (word >> shift) | (word << (32 - shift));
18
#define blk0(i) (W[i] = data[i])
20
static const word32 SHA256_K[64] = {
21
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
22
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
23
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
24
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
25
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
26
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
27
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
28
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
29
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
30
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
31
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
32
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
33
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
34
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
35
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
36
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
39
#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
41
#define Ch(x,y,z) (z^(x&(y^z)))
42
#define Maj(x,y,z) (y^((x^y)&(y^z)))
44
#define a(i) T[(0-i)&7]
45
#define b(i) T[(1-i)&7]
46
#define c(i) T[(2-i)&7]
47
#define d(i) T[(3-i)&7]
48
#define e(i) T[(4-i)&7]
49
#define f(i) T[(5-i)&7]
50
#define g(i) T[(6-i)&7]
51
#define h(i) T[(7-i)&7]
53
#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
54
d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
57
#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
58
#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
59
#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
60
#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
62
static void SHA256_Transform(word32 *state, const word32 *data)
68
/* Copy context->state[] to working vars */
69
memcpy(T, state, sizeof(T));
70
/* 64 operations, partially loop unrolled */
71
for (j=0; j<64; j+=16)
73
R( 0); R( 1); R( 2); R( 3);
74
R( 4); R( 5); R( 6); R( 7);
75
R( 8); R( 9); R(10); R(11);
76
R(12); R(13); R(14); R(15);
78
/* Add the working vars back into context.state[] */
89
static void runhash(void *state, const void *input, const void *init)
91
memcpy(state, init, 32);
92
SHA256_Transform(state, input);
95
/* suspiciously similar to ScanHash* from bitcoin */
96
bool scanhash_cryptopp(int thr_id, const unsigned char *midstate,
98
unsigned char *hash1, unsigned char *hash,
99
const unsigned char *target,
100
uint32_t max_nonce, unsigned long *hashes_done,
103
uint32_t *hash32 = (uint32_t *) hash;
104
uint32_t *nonce = (uint32_t *)(data + 12);
106
work_restart[thr_id].restart = 0;
112
runhash(hash1, data, midstate);
113
runhash(hash, hash1, sha256_init_state);
115
if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
120
if ((n >= max_nonce) || work_restart[thr_id].restart) {
127
#if defined(WANT_CRYPTOPP_ASM32)
129
#define CRYPTOPP_FASTCALL
130
#define CRYPTOPP_BOOL_X86 1
131
#define CRYPTOPP_BOOL_X64 0
132
#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
134
#ifdef CRYPTOPP_GENERATE_X64_MASM
135
#define AS1(x) x*newline*
136
#define AS2(x, y) x, y*newline*
137
#define AS3(x, y, z) x, y, z*newline*
138
#define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
139
#define ASL(x) label##x:*newline*
140
#define ASJ(x, y, z) x label##y*newline*
141
#define ASC(x, y) x label##y*newline*
142
#define AS_HEX(y) 0##y##h
143
#elif defined(_MSC_VER) || defined(__BORLANDC__)
144
#define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
145
#define AS1(x) __asm {x}
146
#define AS2(x, y) __asm {x, y}
147
#define AS3(x, y, z) __asm {x, y, z}
148
#define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
149
#define ASL(x) __asm {label##x:}
150
#define ASJ(x, y, z) __asm {x label##y}
151
#define ASC(x, y) __asm {x label##y}
152
#define CRYPTOPP_NAKED __declspec(naked)
153
#define AS_HEX(y) 0x##y
155
#define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
156
// define these in two steps to allow arguments to be expanded
157
#define GNU_AS1(x) #x ";"
158
#define GNU_AS2(x, y) #x ", " #y ";"
159
#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
160
#define GNU_ASL(x) "\n" #x ":"
161
#define GNU_ASJ(x, y, z) #x " " #y #z ";"
162
#define AS1(x) GNU_AS1(x)
163
#define AS2(x, y) GNU_AS2(x, y)
164
#define AS3(x, y, z) GNU_AS3(x, y, z)
165
#define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
166
#define ASL(x) GNU_ASL(x)
167
#define ASJ(x, y, z) GNU_ASJ(x, y, z)
168
#define ASC(x, y) #x " " #y ";"
169
#define CRYPTOPP_NAKED
170
#define AS_HEX(y) 0x##y
176
#ifdef CRYPTOPP_GENERATE_X64_MASM
177
#define ASM_MOD(x, y) ((x) MOD (y))
178
#define XMMWORD_PTR XMMWORD PTR
180
// GNU assembler doesn't seem to have mod operator
181
#define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
182
// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
186
#if CRYPTOPP_BOOL_X86
194
#define AS_REG_1d ecx
195
#define AS_REG_2d edx
196
#define AS_REG_3d esi
197
#define AS_REG_4d edi
198
#define AS_REG_5d eax
199
#define AS_REG_6d ebx
200
#define AS_REG_7d ebp
202
#define WORD_REG(x) e##x
203
#define WORD_PTR DWORD PTR
204
#define AS_PUSH_IF86(x) AS1(push e##x)
205
#define AS_POP_IF86(x) AS1(pop e##x)
206
#define AS_JCXZ jecxz
207
#elif CRYPTOPP_BOOL_X64
208
#ifdef CRYPTOPP_GENERATE_X64_MASM
216
#define AS_REG_1d ecx
217
#define AS_REG_2d edx
218
#define AS_REG_3d r8d
219
#define AS_REG_4d r9d
220
#define AS_REG_5d eax
221
#define AS_REG_6d r10d
222
#define AS_REG_7d r11d
231
#define AS_REG_1d edi
232
#define AS_REG_2d esi
233
#define AS_REG_3d edx
234
#define AS_REG_4d ecx
235
#define AS_REG_5d r8d
236
#define AS_REG_6d r9d
237
#define AS_REG_7d r10d
240
#define WORD_REG(x) r##x
241
#define WORD_PTR QWORD PTR
242
#define AS_PUSH_IF86(x)
243
#define AS_POP_IF86(x)
244
#define AS_JCXZ jrcxz
247
static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len
248
#if defined(_MSC_VER) && (_MSC_VER == 1200)
249
, ... // VC60 workaround: prevent VC 6 from inlining this function
253
#if defined(_MSC_VER) && (_MSC_VER == 1200)
254
AS2(mov ecx, [state])
258
#define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
259
#define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
267
#define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
268
#define Wt_2(i) Wt((i)-2)
269
#define Wt_15(i) Wt((i)-15)
270
#define Wt_7(i) Wt((i)-7)
271
#define K_END [BASE+8*4+16*4+0*WORD_SZ]
272
#define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
273
#define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
274
#define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
275
#define Kt(i) WORD_REG(si)+(i)*4
276
#if CRYPTOPP_BOOL_X86
278
#elif defined(__GNUC__)
284
#define RA0(i, edx, edi) \
285
AS2( add edx, [Kt(i)] )\
286
AS2( add edx, [Wt(i)] )\
287
AS2( add edx, H(i) )\
289
#define RA1(i, edx, edi)
291
#define RB0(i, edx, edi)
293
#define RB1(i, edx, edi) \
294
AS2( mov AS_REG_7d, [Wt_2(i)] )\
295
AS2( mov edi, [Wt_15(i)])\
296
AS2( mov ebx, AS_REG_7d )\
297
AS2( shr AS_REG_7d, 10 )\
299
AS2( xor AS_REG_7d, ebx )\
301
AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
302
AS2( add ebx, [Wt_7(i)])\
303
AS2( mov AS_REG_7d, edi )\
304
AS2( shr AS_REG_7d, 3 )\
306
AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
307
AS2( xor AS_REG_7d, edi )\
308
AS2( add edx, [Kt(i)])\
310
AS2( add edx, H(i) )\
311
AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
312
AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
313
AS2( mov [Wt(i)], AS_REG_7d)\
314
AS2( add edx, AS_REG_7d )\
316
#define ROUND(i, r, eax, ecx, edi, edx)\
318
/* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
319
AS2( mov edx, F(i) )\
320
AS2( xor edx, G(i) )\
322
AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
323
AS2( mov AS_REG_7d, edi )\
325
AS2( ror AS_REG_7d, 25 )\
326
RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
327
AS2( xor AS_REG_7d, edi )\
329
AS2( xor AS_REG_7d, edi )/* S1(E) */\
330
AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
331
RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
332
/* in: ecx = A, eax = B^C, edx = T1 */\
333
/* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
335
AS2( xor ecx, B(i) )/* A^B */\
337
AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
338
AS2( mov AS_REG_7d, ebx )\
340
AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
341
AS2( add edx, D(i) )\
342
AS2( mov D(i), edx )\
343
AS2( ror AS_REG_7d, 22 )\
344
AS2( xor AS_REG_7d, ebx )\
346
AS2( xor AS_REG_7d, ebx )\
347
AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
348
AS2( mov H(i), eax )\
350
#define SWAP_COPY(i) \
351
AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
352
AS1( bswap WORD_REG(bx))\
353
AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx))
355
#if defined(__GNUC__)
356
#if CRYPTOPP_BOOL_X64
357
FixedSizeAlignedSecBlock<byte, LOCALS_SIZE> workspace;
361
#if CRYPTOPP_BOOL_X64
364
".intel_syntax noprefix;"
365
#elif defined(CRYPTOPP_GENERATE_X64_MASM)
367
X86_SHA256_HashBlocks PROC FRAME
372
alloc_stack(LOCALS_SIZE+8)
375
lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
378
#if CRYPTOPP_BOOL_X86
381
AS2( lea WORD_REG(si), [SHA256_K+48*4])
383
#if !defined(_MSC_VER) || (_MSC_VER < 1400)
390
AS2( sub WORD_REG(sp), LOCALS_SIZE)
393
AS2( mov STATE_SAVE, WORD_REG(cx))
394
AS2( mov DATA_SAVE, WORD_REG(dx))
395
AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
396
AS2( mov DATA_END, WORD_REG(ax))
397
AS2( mov K_END, WORD_REG(si))
399
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
400
#if CRYPTOPP_BOOL_X86
403
AS1( dec DWORD PTR K_END)
405
AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
406
AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
409
#if CRYPTOPP_BOOL_X86
410
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
422
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
424
AS2( movdqa E(0), xmm1)
425
AS2( movdqa A(0), xmm0)
427
#if CRYPTOPP_BOOL_X86
430
AS2( sub WORD_REG(si), 48*4)
431
SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
432
SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
433
#if CRYPTOPP_BOOL_X86
434
SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
435
SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
437
AS2( mov edi, E(0)) // E
438
AS2( mov eax, B(0)) // B
439
AS2( xor eax, C(0)) // B^C
440
AS2( mov ecx, A(0)) // A
442
ROUND(0, 0, eax, ecx, edi, edx)
443
ROUND(1, 0, ecx, eax, edx, edi)
444
ROUND(2, 0, eax, ecx, edi, edx)
445
ROUND(3, 0, ecx, eax, edx, edi)
446
ROUND(4, 0, eax, ecx, edi, edx)
447
ROUND(5, 0, ecx, eax, edx, edi)
448
ROUND(6, 0, eax, ecx, edi, edx)
449
ROUND(7, 0, ecx, eax, edx, edi)
450
ROUND(8, 0, eax, ecx, edi, edx)
451
ROUND(9, 0, ecx, eax, edx, edi)
452
ROUND(10, 0, eax, ecx, edi, edx)
453
ROUND(11, 0, ecx, eax, edx, edi)
454
ROUND(12, 0, eax, ecx, edi, edx)
455
ROUND(13, 0, ecx, eax, edx, edi)
456
ROUND(14, 0, eax, ecx, edi, edx)
457
ROUND(15, 0, ecx, eax, edx, edi)
460
AS2(add WORD_REG(si), 4*16)
461
ROUND(0, 1, eax, ecx, edi, edx)
462
ROUND(1, 1, ecx, eax, edx, edi)
463
ROUND(2, 1, eax, ecx, edi, edx)
464
ROUND(3, 1, ecx, eax, edx, edi)
465
ROUND(4, 1, eax, ecx, edi, edx)
466
ROUND(5, 1, ecx, eax, edx, edi)
467
ROUND(6, 1, eax, ecx, edi, edx)
468
ROUND(7, 1, ecx, eax, edx, edi)
469
ROUND(8, 1, eax, ecx, edi, edx)
470
ROUND(9, 1, ecx, eax, edx, edi)
471
ROUND(10, 1, eax, ecx, edi, edx)
472
ROUND(11, 1, ecx, eax, edx, edi)
473
ROUND(12, 1, eax, ecx, edi, edx)
474
ROUND(13, 1, ecx, eax, edx, edi)
475
ROUND(14, 1, eax, ecx, edi, edx)
476
ROUND(15, 1, ecx, eax, edx, edi)
477
AS2( cmp WORD_REG(si), K_END)
480
AS2( mov WORD_REG(dx), DATA_SAVE)
481
AS2( add WORD_REG(dx), 64)
482
AS2( mov AS_REG_7, STATE_SAVE)
483
AS2( mov DATA_SAVE, WORD_REG(dx))
485
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
486
#if CRYPTOPP_BOOL_X86
487
AS2( test DWORD PTR K_END, 1)
490
AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
491
AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
492
AS2( paddd xmm1, E(0))
493
AS2( paddd xmm0, A(0))
494
AS2( movdqa [AS_REG_7+1*16], xmm1)
495
AS2( movdqa [AS_REG_7+0*16], xmm0)
496
AS2( cmp WORD_REG(dx), DATA_END)
500
#if CRYPTOPP_BOOL_X86
501
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
505
AS2( add [AS_REG_7+0*4], ecx) // A
506
AS2( add [AS_REG_7+4*4], edi) // E
510
AS2( add [AS_REG_7+1*4], eax)
511
AS2( add [AS_REG_7+2*4], ebx)
512
AS2( add [AS_REG_7+3*4], ecx)
516
AS2( add [AS_REG_7+5*4], eax)
517
AS2( add [AS_REG_7+6*4], ebx)
518
AS2( add [AS_REG_7+7*4], ecx)
519
AS2( mov ecx, AS_REG_7d)
520
AS2( cmp WORD_REG(dx), DATA_END)
522
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
529
#if !defined(_MSC_VER) || (_MSC_VER < 1400)
533
#ifdef CRYPTOPP_GENERATE_X64_MASM
534
add rsp, LOCALS_SIZE+8
540
X86_SHA256_HashBlocks ENDP
544
".att_syntax prefix;"
546
: "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
547
#if CRYPTOPP_BOOL_X64
550
: "memory", "cc", "%eax"
551
#if CRYPTOPP_BOOL_X64
552
, "%rbx", "%r8", "%r10"
558
static inline bool HasSSE2(void) { return false; }
560
static void SHA256_Transform32(word32 *state, const word32 *data)
565
for (i = 0; i < 16; i++)
566
W[i] = swab32(((word32 *)(data))[i]);
568
X86_SHA256_HashBlocks(state, W, 16 * 4);
571
static void runhash32(void *state, const void *input, const void *init)
573
memcpy(state, init, 32);
574
SHA256_Transform32(state, input);
577
/* suspiciously similar to ScanHash* from bitcoin */
578
bool scanhash_asm32(int thr_id, const unsigned char *midstate,
580
unsigned char *hash1, unsigned char *hash,
581
const unsigned char *target,
582
uint32_t max_nonce, unsigned long *hashes_done,
585
uint32_t *hash32 = (uint32_t *) hash;
586
uint32_t *nonce = (uint32_t *)(data + 12);
588
work_restart[thr_id].restart = 0;
594
runhash32(hash1, data, midstate);
595
runhash32(hash, hash1, sha256_init_state);
597
if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
602
if ((n >= max_nonce) || work_restart[thr_id].restart) {
609
#endif // #if defined(WANT_CRYPTOPP_ASM32)