177
179
W64LIT(0x16165816b04e2ca6), W64LIT(0x3a3ae83acdd274f7), W64LIT(0x6969b9696fd0d206), W64LIT(0x09092409482d1241),
178
180
W64LIT(0x7070dd70a7ade0d7), W64LIT(0xb6b6e2b6d954716f), W64LIT(0xd0d067d0ceb7bd1e), W64LIT(0xeded93ed3b7ec7d6),
179
181
W64LIT(0xcccc17cc2edb85e2), W64LIT(0x424215422a578468), W64LIT(0x98985a98b4c22d2c), W64LIT(0xa4a4aaa4490e55ed),
180
W64LIT(0x2828a0285d885075), W64LIT(0x5c5c6d5cda31b886), W64LIT(0xf8f8c7f8933fed6b), W64LIT(0x8686228644a411c2),
182
W64LIT(0x2828a0285d885075), W64LIT(0x5c5c6d5cda31b886), W64LIT(0xf8f8c7f8933fed6b), W64LIT(0x8686228644a411c2),
183
static const word64 C1[256] = {
184
W64LIT(0xd818186018c07830), W64LIT(0x2623238c2305af46), W64LIT(0xb8c6c63fc67ef991), W64LIT(0xfbe8e887e8136fcd),
184
W64LIT(0xd818186018c07830), W64LIT(0x2623238c2305af46), W64LIT(0xb8c6c63fc67ef991), W64LIT(0xfbe8e887e8136fcd),
185
185
W64LIT(0xcb878726874ca113), W64LIT(0x11b8b8dab8a9626d), W64LIT(0x0901010401080502), W64LIT(0x0d4f4f214f426e9e),
186
186
W64LIT(0x9b3636d836adee6c), W64LIT(0xffa6a6a2a6590451), W64LIT(0x0cd2d26fd2debdb9), W64LIT(0x0ef5f5f3f5fb06f7),
187
187
W64LIT(0x967979f979ef80f2), W64LIT(0x306f6fa16f5fcede), W64LIT(0x6d91917e91fcef3f), W64LIT(0xf852525552aa07a4),
397
391
// Whirlpool basic transformation. Transforms state based on block.
398
392
void Whirlpool::Transform(word64 *digest, const word64 *block)
394
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
397
// MMX version has the same structure as C version below
399
#if CRYPTOPP_BOOL_X64
400
word64 workspace[16];
404
".intel_syntax noprefix;"
406
AS2( mov AS_REG_6, WORD_REG(ax))
411
AS2( lea AS_REG_6, [Whirlpool_C])
412
AS2( mov WORD_REG(cx), digest)
413
AS2( mov WORD_REG(dx), block)
415
#if CRYPTOPP_BOOL_X86
420
#define SSE2_workspace esp+WORD_SZ
422
#define SSE2_workspace %3
426
AS2( movq mm0, [WORD_REG(cx)+8*WORD_REG(si)])
427
AS2( movq [SSE2_workspace+8*WORD_REG(si)], mm0) // k
428
AS2( pxor mm0, [WORD_REG(dx)+8*WORD_REG(si)])
429
AS2( movq [SSE2_workspace+64+8*WORD_REG(si)], mm0) // s
430
AS2( movq [WORD_REG(cx)+8*WORD_REG(si)], mm0)
431
AS1( inc WORD_REG(si))
432
AS2( cmp WORD_REG(si), 8)
438
#define KSL0(a, b) AS2(movq mm##a, b)
439
#define KSL1(a, b) AS2(pxor mm##a, b)
441
#define KSL(op, i, a, b, c, d) \
442
AS2(mov eax, [SSE2_workspace+8*i])\
444
KSL##op(a, [AS_REG_6+3*2048+8*WORD_REG(di)])\
446
KSL##op(b, [AS_REG_6+2*2048+8*WORD_REG(di)])\
450
KSL##op(c, [AS_REG_6+1*2048+8*WORD_REG(di)])\
451
KSL##op(d, [AS_REG_6+0*2048+8*WORD_REG(ax)])
454
ASS(pshufw mm##a, mm##a, 1, 0, 3, 2)\
460
AS2(movq [SSE2_workspace+8*a], mm##a)
462
#define KSH(op, i, a, b, c, d) \
463
AS2(mov eax, [SSE2_workspace+8*((i+4)-8*((i+4)/8))+4])\
465
KSH##op(a, [AS_REG_6+3*2048+8*WORD_REG(di)])\
467
KSH##op(b, [AS_REG_6+2*2048+8*WORD_REG(di)])\
471
KSH##op(c, [AS_REG_6+1*2048+8*WORD_REG(di)])\
472
KSH##op(d, [AS_REG_6+0*2048+8*WORD_REG(ax)])
474
#define TSL(op, i, a, b, c, d) \
475
AS2(mov eax, [SSE2_workspace+64+8*i])\
477
KSL##op(a, [AS_REG_6+3*2048+8*WORD_REG(di)])\
479
KSL##op(b, [AS_REG_6+2*2048+8*WORD_REG(di)])\
483
KSL##op(c, [AS_REG_6+1*2048+8*WORD_REG(di)])\
484
KSL##op(d, [AS_REG_6+0*2048+8*WORD_REG(ax)])
487
ASS(pshufw mm##a, mm##a, 1, 0, 3, 2)\
488
AS2(pxor mm##a, [SSE2_workspace+8*a])\
494
AS2(movq [SSE2_workspace+64+8*a], mm##a)
497
AS2(pxor mm##a, [WORD_REG(cx)+8*a])\
498
AS2(movq [WORD_REG(cx)+8*a], mm##a)
500
#define TSH(op, i, a, b, c, d) \
501
AS2(mov eax, [SSE2_workspace+64+8*((i+4)-8*((i+4)/8))+4])\
503
TSH##op(a, [AS_REG_6+3*2048+8*WORD_REG(di)])\
505
TSH##op(b, [AS_REG_6+2*2048+8*WORD_REG(di)])\
509
TSH##op(c, [AS_REG_6+1*2048+8*WORD_REG(di)])\
510
TSH##op(d, [AS_REG_6+0*2048+8*WORD_REG(ax)])
512
KSL(0, 4, 3, 2, 1, 0)
513
KSL(0, 0, 7, 6, 5, 4)
514
KSL(1, 1, 0, 7, 6, 5)
515
KSL(1, 2, 1, 0, 7, 6)
516
KSL(1, 3, 2, 1, 0, 7)
517
KSL(1, 5, 4, 3, 2, 1)
518
KSL(1, 6, 5, 4, 3, 2)
519
KSL(1, 7, 6, 5, 4, 3)
520
KSH(0, 0, 7, 6, 5, 4)
521
KSH(0, 4, 3, 2, 1, 0)
522
KSH(1, 1, 0, 7, 6, 5)
523
KSH(1, 2, 1, 0, 7, 6)
524
KSH(1, 5, 4, 3, 2, 1)
525
KSH(1, 6, 5, 4, 3, 2)
526
KSH(2, 3, 2, 1, 0, 7)
527
KSH(2, 7, 6, 5, 4, 3)
529
AS2( pxor mm0, [AS_REG_6 + 8*1024 + WORD_REG(si)*8])
530
AS2( movq [SSE2_workspace], mm0)
532
TSL(0, 4, 3, 2, 1, 0)
533
TSL(0, 0, 7, 6, 5, 4)
534
TSL(1, 1, 0, 7, 6, 5)
535
TSL(1, 2, 1, 0, 7, 6)
536
TSL(1, 3, 2, 1, 0, 7)
537
TSL(1, 5, 4, 3, 2, 1)
538
TSL(1, 6, 5, 4, 3, 2)
539
TSL(1, 7, 6, 5, 4, 3)
540
TSH(0, 0, 7, 6, 5, 4)
541
TSH(0, 4, 3, 2, 1, 0)
542
TSH(1, 1, 0, 7, 6, 5)
543
TSH(1, 2, 1, 0, 7, 6)
544
TSH(1, 5, 4, 3, 2, 1)
545
TSH(1, 6, 5, 4, 3, 2)
547
AS1( inc WORD_REG(si))
548
AS2( cmp WORD_REG(si), 10)
551
TSH(2, 3, 2, 1, 0, 7)
552
TSH(2, 7, 6, 5, 4, 3)
557
TSH(3, 3, 2, 1, 0, 7)
558
TSH(3, 7, 6, 5, 4, 3)
568
#if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)
572
".att_syntax prefix;"
574
: "a" (Whirlpool_C), "c" (digest), "d" (block)
575
#if CRYPTOPP_BOOL_X64
578
: "%esi", "%edi", "memory", "cc"
579
#if CRYPTOPP_BOOL_X64
586
#endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
400
588
word64 s[8]; // the cipher state
401
589
word64 k[8]; // the round key
403
591
// Compute and apply K^0 to the cipher state
404
592
// Also apply part of the Miyaguchi-Preneel compression function
405
digest[0] = s[0] = block[0] ^ (k[0] = digest[0]);
406
digest[1] = s[1] = block[1] ^ (k[1] = digest[1]);
407
digest[2] = s[2] = block[2] ^ (k[2] = digest[2]);
408
digest[3] = s[3] = block[3] ^ (k[3] = digest[3]);
409
digest[4] = s[4] = block[4] ^ (k[4] = digest[4]);
410
digest[5] = s[5] = block[5] ^ (k[5] = digest[5]);
411
digest[6] = s[6] = block[6] ^ (k[6] = digest[6]);
412
digest[7] = s[7] = block[7] ^ (k[7] = digest[7]);
593
for (int i=0; i<8; i++)
594
digest[i] = s[i] = block[i] ^ (k[i] = digest[i]);
596
#define KSL(op, i, a, b, c, d) \
598
w##a = Whirlpool_C[3*256 + (byte)t] ^ (op ? w##a : 0);\
600
w##b = Whirlpool_C[2*256 + (byte)t] ^ (op ? w##b : 0);\
602
w##c = Whirlpool_C[1*256 + (byte)t] ^ (op ? w##c : 0);\
604
w##d = Whirlpool_C[0*256 + t] ^ (op ? w##d : 0);
606
#define KSH(op, i, a, b, c, d) \
607
t = (word32)(k[(i+4)%8]>>32);\
608
w##a = Whirlpool_C[3*256 + (byte)t] ^ (op ? w##a : rotrFixed(w##a, 32));\
609
if (op==2) k[a] = w##a;\
611
w##b = Whirlpool_C[2*256 + (byte)t] ^ (op ? w##b : rotrFixed(w##b, 32));\
612
if (op==2) k[b] = w##b;\
614
w##c = Whirlpool_C[1*256 + (byte)t] ^ (op ? w##c : rotrFixed(w##c, 32));\
615
if (op==2) k[c] = w##c;\
617
w##d = Whirlpool_C[0*256 + t] ^ (op ? w##d : rotrFixed(w##d, 32));\
618
if (op==2) k[d] = w##d;\
620
#define TSL(op, i, a, b, c, d) \
622
w##a = Whirlpool_C[3*256 + (byte)t] ^ (op ? w##a : 0);\
624
w##b = Whirlpool_C[2*256 + (byte)t] ^ (op ? w##b : 0);\
626
w##c = Whirlpool_C[1*256 + (byte)t] ^ (op ? w##c : 0);\
628
w##d = Whirlpool_C[0*256 + t] ^ (op ? w##d : 0);
630
#define TSH_OP(op, a, b) \
631
w##a = Whirlpool_C[b*256 + (byte)t] ^ (op ? w##a : rotrFixed(w##a, 32) ^ k[a]);\
632
if (op==2) s[a] = w##a;\
633
if (op==3) digest[a] ^= w##a;\
635
#define TSH(op, i, a, b, c, d) \
636
t = (word32)(s[(i+4)%8]>>32);\
414
645
// Iterate over all rounds:
415
for (int r = 0; r < R; r++)
417
649
word64 w0, w1, w2, w3, w4, w5, w6, w7; // temporary storage
420
// Compute K^r from K^{r-1}:
421
#define K(i,j) GETBYTE(k[(i+j+1)%8], j)
423
t = C0[K(i,3)] ^ C1[K(i,2)] ^ C2[K(i,1)] ^ C3[K(i,0)]; \
424
w##i = rotrFixed(t, 32) ^ C0[K(i,7)] ^ C1[K(i,6)] ^ C2[K(i,5)] ^ C3[K(i,4)];
426
KS(0); KS(1); KS(2); KS(3); KS(4); KS(5); KS(6); KS(7);
428
k[1] = w1; k[2] = w2; k[3] = w3; k[4] = w4; k[5] = w5; k[6] = w6; k[7] = w7;
430
// Apply the r-th round transformation:
431
#define S(i,j) GETBYTE(s[(i+j+1)%8], j)
433
t = C0[S(i,3)] ^ C1[S(i,2)] ^ C2[S(i,1)] ^ C3[S(i,0)]; \
434
w##i = rotrFixed(t, 32) ^ C0[S(i,7)] ^ C1[S(i,6)] ^ C2[S(i,5)] ^ C3[S(i,4)] ^ k[i];
436
TS(0); TS(1); TS(2); TS(3); TS(4); TS(5); TS(6); TS(7);
437
s[0] = w0; s[1] = w1; s[2] = w2; s[3] = w3; s[4] = w4; s[5] = w5; s[6] = w6; s[7] = w7;
440
// Apply the rest of the Miyaguchi-Preneel compression function:
652
KSL(0, 4, 3, 2, 1, 0)
653
KSL(0, 0, 7, 6, 5, 4)
654
KSL(1, 1, 0, 7, 6, 5)
655
KSL(1, 2, 1, 0, 7, 6)
656
KSL(1, 3, 2, 1, 0, 7)
657
KSL(1, 5, 4, 3, 2, 1)
658
KSL(1, 6, 5, 4, 3, 2)
659
KSL(1, 7, 6, 5, 4, 3)
660
KSH(0, 0, 7, 6, 5, 4)
661
KSH(0, 4, 3, 2, 1, 0)
662
KSH(1, 1, 0, 7, 6, 5)
663
KSH(1, 2, 1, 0, 7, 6)
664
KSH(1, 5, 4, 3, 2, 1)
665
KSH(1, 6, 5, 4, 3, 2)
666
KSH(2, 3, 2, 1, 0, 7)
667
KSH(2, 7, 6, 5, 4, 3)
669
k[0] ^= Whirlpool_C[1024+r];
671
TSL(0, 4, 3, 2, 1, 0)
672
TSL(0, 0, 7, 6, 5, 4)
673
TSL(1, 1, 0, 7, 6, 5)
674
TSL(1, 2, 1, 0, 7, 6)
675
TSL(1, 3, 2, 1, 0, 7)
676
TSL(1, 5, 4, 3, 2, 1)
677
TSL(1, 6, 5, 4, 3, 2)
678
TSL(1, 7, 6, 5, 4, 3)
679
TSH(0, 0, 7, 6, 5, 4)
680
TSH(0, 4, 3, 2, 1, 0)
681
TSH(1, 1, 0, 7, 6, 5)
682
TSH(1, 2, 1, 0, 7, 6)
683
TSH(1, 5, 4, 3, 2, 1)
684
TSH(1, 6, 5, 4, 3, 2)
688
TSH(2, 3, 2, 1, 0, 7)
689
TSH(2, 7, 6, 5, 4, 3)
693
TSH(3, 3, 2, 1, 0, 7)
694
TSH(3, 7, 6, 5, 4, 3)
453
#endif // WORD64_AVAILABLE