2
Copyright (C) 2000-2007 MySQL AB
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; version 2 of the License.
8
This program is distributed in the hope that it will be useful,
9
but WITHOUT ANY WARRANTY; without even the implied warranty of
10
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License
14
along with this program; see the file COPYING. If not, write to the
15
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
19
/* based on Wei Dai's sha.cpp from CryptoPP */
21
#include "runtime.hpp"
27
#include "algorithm.hpp"
31
namespace STL = STL_NAMESPACE;
37
#define blk0(i) (W[i] = buffer_[i])
38
#define blk1(i) (W[i&15] = \
39
rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
41
#define f1(x,y,z) (z^(x &(y^z)))
42
#define f2(x,y,z) (x^y^z)
43
#define f3(x,y,z) ((x&y)|(z&(x|y)))
44
#define f4(x,y,z) (x^y^z)
46
// (R0+R1), R2, R3, R4 are the different operations used in SHA1
47
#define R0(v,w,x,y,z,i) z+= f1(w,x,y) + blk0(i) + 0x5A827999+ \
48
rotlFixed(v,5); w = rotlFixed(w,30);
49
#define R1(v,w,x,y,z,i) z+= f1(w,x,y) + blk1(i) + 0x5A827999+ \
50
rotlFixed(v,5); w = rotlFixed(w,30);
51
#define R2(v,w,x,y,z,i) z+= f2(w,x,y) + blk1(i) + 0x6ED9EBA1+ \
52
rotlFixed(v,5); w = rotlFixed(w,30);
53
#define R3(v,w,x,y,z,i) z+= f3(w,x,y) + blk1(i) + 0x8F1BBCDC+ \
54
rotlFixed(v,5); w = rotlFixed(w,30);
55
#define R4(v,w,x,y,z,i) z+= f4(w,x,y) + blk1(i) + 0xCA62C1D6+ \
56
rotlFixed(v,5); w = rotlFixed(w,30);
61
digest_[0] = 0x67452301L;
62
digest_[1] = 0xEFCDAB89L;
63
digest_[2] = 0x98BADCFEL;
64
digest_[3] = 0x10325476L;
65
digest_[4] = 0xC3D2E1F0L;
74
digest_[0] = 0x6A09E667L;
75
digest_[1] = 0xBB67AE85L;
76
digest_[2] = 0x3C6EF372L;
77
digest_[3] = 0xA54FF53AL;
78
digest_[4] = 0x510E527FL;
79
digest_[5] = 0x9B05688CL;
80
digest_[6] = 0x1F83D9ABL;
81
digest_[7] = 0x5BE0CD19L;
91
digest_[0] = 0xc1059ed8;
92
digest_[1] = 0x367cd507;
93
digest_[2] = 0x3070dd17;
94
digest_[3] = 0xf70e5939;
95
digest_[4] = 0xffc00b31;
96
digest_[5] = 0x68581511;
97
digest_[6] = 0x64f98fa7;
98
digest_[7] = 0xbefa4fa4;
106
#ifdef WORD64_AVAILABLE
110
digest_[0] = W64LIT(0x6a09e667f3bcc908);
111
digest_[1] = W64LIT(0xbb67ae8584caa73b);
112
digest_[2] = W64LIT(0x3c6ef372fe94f82b);
113
digest_[3] = W64LIT(0xa54ff53a5f1d36f1);
114
digest_[4] = W64LIT(0x510e527fade682d1);
115
digest_[5] = W64LIT(0x9b05688c2b3e6c1f);
116
digest_[6] = W64LIT(0x1f83d9abfb41bd6b);
117
digest_[7] = W64LIT(0x5be0cd19137e2179);
127
digest_[0] = W64LIT(0xcbbb9d5dc1059ed8);
128
digest_[1] = W64LIT(0x629a292a367cd507);
129
digest_[2] = W64LIT(0x9159015a3070dd17);
130
digest_[3] = W64LIT(0x152fecd8f70e5939);
131
digest_[4] = W64LIT(0x67332667ffc00b31);
132
digest_[5] = W64LIT(0x8eb44a8768581511);
133
digest_[6] = W64LIT(0xdb0c2e0d64f98fa7);
134
digest_[7] = W64LIT(0x47b5481dbefa4fa4);
141
#endif // WORD64_AVAILABLE
144
SHA::SHA(const SHA& that) : HASHwithTransform(DIGEST_SIZE / sizeof(word32),
147
buffLen_ = that.buffLen_;
148
loLen_ = that.loLen_;
149
hiLen_ = that.hiLen_;
151
memcpy(digest_, that.digest_, DIGEST_SIZE);
152
memcpy(buffer_, that.buffer_, BLOCK_SIZE);
156
SHA256::SHA256(const SHA256& that) : HASHwithTransform(DIGEST_SIZE /
157
sizeof(word32), BLOCK_SIZE)
159
buffLen_ = that.buffLen_;
160
loLen_ = that.loLen_;
161
hiLen_ = that.hiLen_;
163
memcpy(digest_, that.digest_, DIGEST_SIZE);
164
memcpy(buffer_, that.buffer_, BLOCK_SIZE);
168
SHA224::SHA224(const SHA224& that) : HASHwithTransform(SHA256::DIGEST_SIZE /
169
sizeof(word32), BLOCK_SIZE)
171
buffLen_ = that.buffLen_;
172
loLen_ = that.loLen_;
173
hiLen_ = that.hiLen_;
175
memcpy(digest_, that.digest_, DIGEST_SIZE);
176
memcpy(buffer_, that.buffer_, BLOCK_SIZE);
180
#ifdef WORD64_AVAILABLE
182
SHA512::SHA512(const SHA512& that) : HASH64withTransform(DIGEST_SIZE /
183
sizeof(word64), BLOCK_SIZE)
185
buffLen_ = that.buffLen_;
186
loLen_ = that.loLen_;
187
hiLen_ = that.hiLen_;
189
memcpy(digest_, that.digest_, DIGEST_SIZE);
190
memcpy(buffer_, that.buffer_, BLOCK_SIZE);
194
SHA384::SHA384(const SHA384& that) : HASH64withTransform(SHA512::DIGEST_SIZE /
195
sizeof(word64), BLOCK_SIZE)
197
buffLen_ = that.buffLen_;
198
loLen_ = that.loLen_;
199
hiLen_ = that.hiLen_;
201
memcpy(digest_, that.digest_, DIGEST_SIZE);
202
memcpy(buffer_, that.buffer_, BLOCK_SIZE);
205
#endif // WORD64_AVAILABLE
208
SHA& SHA::operator= (const SHA& that)
217
SHA256& SHA256::operator= (const SHA256& that)
226
SHA224& SHA224::operator= (const SHA224& that)
235
#ifdef WORD64_AVAILABLE
237
SHA512& SHA512::operator= (const SHA512& that)
246
SHA384& SHA384::operator= (const SHA384& that)
254
#endif // WORD64_AVAILABLE
257
void SHA::Swap(SHA& other)
259
STL::swap(loLen_, other.loLen_);
260
STL::swap(hiLen_, other.hiLen_);
261
STL::swap(buffLen_, other.buffLen_);
263
memcpy(digest_, other.digest_, DIGEST_SIZE);
264
memcpy(buffer_, other.buffer_, BLOCK_SIZE);
268
void SHA256::Swap(SHA256& other)
270
STL::swap(loLen_, other.loLen_);
271
STL::swap(hiLen_, other.hiLen_);
272
STL::swap(buffLen_, other.buffLen_);
274
memcpy(digest_, other.digest_, DIGEST_SIZE);
275
memcpy(buffer_, other.buffer_, BLOCK_SIZE);
279
void SHA224::Swap(SHA224& other)
281
STL::swap(loLen_, other.loLen_);
282
STL::swap(hiLen_, other.hiLen_);
283
STL::swap(buffLen_, other.buffLen_);
285
memcpy(digest_, other.digest_, DIGEST_SIZE);
286
memcpy(buffer_, other.buffer_, BLOCK_SIZE);
290
#ifdef WORD64_AVAILABLE
292
void SHA512::Swap(SHA512& other)
294
STL::swap(loLen_, other.loLen_);
295
STL::swap(hiLen_, other.hiLen_);
296
STL::swap(buffLen_, other.buffLen_);
298
memcpy(digest_, other.digest_, DIGEST_SIZE);
299
memcpy(buffer_, other.buffer_, BLOCK_SIZE);
303
void SHA384::Swap(SHA384& other)
305
STL::swap(loLen_, other.loLen_);
306
STL::swap(hiLen_, other.hiLen_);
307
STL::swap(buffLen_, other.buffLen_);
309
memcpy(digest_, other.digest_, DIGEST_SIZE);
310
memcpy(buffer_, other.buffer_, BLOCK_SIZE);
313
#endif // WORD64_AVIALABLE
318
// Update digest with data of size len
319
void SHA::Update(const byte* data, word32 len)
322
HASHwithTransform::Update(data, len);
326
byte* local = reinterpret_cast<byte*>(buffer_);
328
// remove buffered data if possible
330
word32 add = min(len, BLOCK_SIZE - buffLen_);
331
memcpy(&local[buffLen_], data, add);
337
if (buffLen_ == BLOCK_SIZE) {
338
ByteReverse(local, local, BLOCK_SIZE);
340
AddLength(BLOCK_SIZE);
345
// all at once for asm
347
word32 times = len / BLOCK_SIZE;
349
AsmTransform(data, times);
350
const word32 add = BLOCK_SIZE * times;
357
// cache any data left
359
memcpy(&local[buffLen_], data, len);
367
void SHA::Transform()
369
word32 W[BLOCK_SIZE / sizeof(word32)];
371
// Copy context->state[] to working vars
372
word32 a = digest_[0];
373
word32 b = digest_[1];
374
word32 c = digest_[2];
375
word32 d = digest_[3];
376
word32 e = digest_[4];
378
// 4 rounds of 20 operations each. Loop unrolled.
379
R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
380
R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
381
R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
382
R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
384
R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
386
R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
387
R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
388
R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
389
R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
390
R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
392
R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
393
R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
394
R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
395
R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
396
R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
398
R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
399
R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
400
R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
401
R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
402
R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
404
// Add the working vars back into digest state[]
412
a = b = c = d = e = 0;
413
memset(W, 0, sizeof(W));
417
#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
419
#define Ch(x,y,z) (z^(x&(y^z)))
420
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
422
#define a(i) T[(0-i)&7]
423
#define b(i) T[(1-i)&7]
424
#define c(i) T[(2-i)&7]
425
#define d(i) T[(3-i)&7]
426
#define e(i) T[(4-i)&7]
427
#define f(i) T[(5-i)&7]
428
#define g(i) T[(6-i)&7]
429
#define h(i) T[(7-i)&7]
431
#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
432
d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
435
#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
436
#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
437
#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
438
#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
441
static const word32 K256[64] = {
442
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
443
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
444
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
445
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
446
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
447
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
448
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
449
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
450
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
451
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
452
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
453
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
454
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
455
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
456
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
457
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
461
static void Transform256(word32* digest_, word32* buffer_)
463
const word32* K = K256;
468
// Copy digest to working vars
469
memcpy(T, digest_, sizeof(T));
471
// 64 operations, partially loop unrolled
472
for (unsigned int j = 0; j < 64; j += 16) {
473
R( 0); R( 1); R( 2); R( 3);
474
R( 4); R( 5); R( 6); R( 7);
475
R( 8); R( 9); R(10); R(11);
476
R(12); R(13); R(14); R(15);
479
// Add the working vars back into digest
490
memset(W, 0, sizeof(W));
491
memset(T, 0, sizeof(T));
502
void SHA256::Transform()
504
Transform256(digest_, buffer_);
508
void SHA224::Transform()
510
Transform256(digest_, buffer_);
514
#ifdef WORD64_AVAILABLE
516
static const word64 K512[80] = {
517
W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
518
W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
519
W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
520
W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
521
W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
522
W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
523
W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
524
W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
525
W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
526
W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
527
W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
528
W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
529
W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
530
W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
531
W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
532
W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
533
W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
534
W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
535
W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
536
W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
537
W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
538
W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
539
W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
540
W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
541
W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
542
W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
543
W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
544
W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
545
W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
546
W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
547
W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
548
W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
549
W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
550
W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
551
W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
552
W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
553
W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
554
W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
555
W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
556
W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
561
#define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
562
#define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
563
#define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
564
#define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
567
static void Transform512(word64* digest_, word64* buffer_)
569
const word64* K = K512;
574
// Copy digest to working vars
575
memcpy(T, digest_, sizeof(T));
577
// 64 operations, partially loop unrolled
578
for (unsigned int j = 0; j < 80; j += 16) {
579
R( 0); R( 1); R( 2); R( 3);
580
R( 4); R( 5); R( 6); R( 7);
581
R( 8); R( 9); R(10); R(11);
582
R(12); R(13); R(14); R(15);
585
// Add the working vars back into digest
597
memset(W, 0, sizeof(W));
598
memset(T, 0, sizeof(T));
602
void SHA512::Transform()
604
Transform512(digest_, buffer_);
608
void SHA384::Transform()
610
Transform512(digest_, buffer_);
613
#endif // WORD64_AVIALABLE
618
// f1(x,y,z) (z^(x &(y^z)))
620
#define ASMf1(x,y,z) \
628
// z+= f1(w,x,y) + W[i] + 0x5A827999 + rotlFixed(v,5);
629
// w = rotlFixed(w,30);
635
#define ASMR0(v,w,x,y,z,i) \
637
AS2( mov edi, [esp + i * 4] ) \
640
AS2( lea z, [edi + z + 0x5A827999] ) \
649
/* Some macro stuff, but older gas ( < 2,16 ) can't process &, so do by hand
650
% won't work on gas at all
652
#define xstr(s) str(s)
655
#define WOFF1(a) ( a & 15)
656
#define WOFF2(a) ((a + 2) & 15)
657
#define WOFF3(a) ((a + 8) & 15)
658
#define WOFF4(a) ((a + 13) & 15)
661
#define WGET1(i) asm("mov esp, [edi - "xstr(WOFF1(i))" * 4] ");
662
#define WGET2(i) asm("xor esp, [edi - "xstr(WOFF2(i))" * 4] ");
663
#define WGET3(i) asm("xor esp, [edi - "xstr(WOFF3(i))" * 4] ");
664
#define WGET4(i) asm("xor esp, [edi - "xstr(WOFF4(i))" * 4] ");
665
#define WPUT1(i) asm("mov [edi - "xstr(WOFF1(i))" * 4], esp ");
667
#define WGET1(i) AS2( mov esp, [edi - WOFF1(i) * 4] )
668
#define WGET2(i) AS2( xor esp, [edi - WOFF2(i) * 4] )
669
#define WGET3(i) AS2( xor esp, [edi - WOFF3(i) * 4] )
670
#define WGET4(i) AS2( xor esp, [edi - WOFF4(i) * 4] )
671
#define WPUT1(i) AS2( mov [edi - WOFF1(i) * 4], esp )
675
// ASMR1 = ASMR0 but use esp for W calcs
677
#define ASMR1(v,w,x,y,z,i,W1,W2,W3,W4) \
678
AS2( mov edi, [esp + W1 * 4] ) \
680
AS2( xor edi, [esp + W2 * 4] ) \
682
AS2( xor edi, [esp + W3 * 4] ) \
684
AS2( xor edi, [esp + W4 * 4] ) \
687
AS2( mov [esp + W1 * 4], edi ) \
688
AS2( lea z, [edi + z + 0x5A827999] ) \
696
// ASMR2 = ASMR1 but f is xor, xor instead
698
#define ASMR2(v,w,x,y,z,i,W1,W2,W3,W4) \
699
AS2( mov edi, [esp + W1 * 4] ) \
701
AS2( xor edi, [esp + W2 * 4] ) \
703
AS2( xor edi, [esp + W3 * 4] ) \
705
AS2( xor edi, [esp + W4 * 4] ) \
708
AS2( mov [esp + W1 * 4], edi ) \
709
AS2( lea z, [edi + z + 0x6ED9EBA1] ) \
716
// ASMR3 = ASMR2 but f is (x&y)|(z&(x|y))
717
// which is (w&x)|(y&(w|x))
719
#define ASMR3(v,w,x,y,z,i,W1,W2,W3,W4) \
720
AS2( mov edi, [esp + W1 * 4] ) \
722
AS2( xor edi, [esp + W2 * 4] ) \
724
AS2( xor edi, [esp + W3 * 4] ) \
726
AS2( xor edi, [esp + W4 * 4] ) \
727
AS2( movd mm0, esi ) \
730
AS2( mov [esp + W1 * 4], edi ) \
732
AS2( lea z, [edi + z + 0x8F1BBCDC] ) \
733
AS2( movd edi, mm0 ) \
742
// ASMR4 = ASMR2 but different constant
744
#define ASMR4(v,w,x,y,z,i,W1,W2,W3,W4) \
745
AS2( mov edi, [esp + W1 * 4] ) \
747
AS2( xor edi, [esp + W2 * 4] ) \
749
AS2( xor edi, [esp + W3 * 4] ) \
751
AS2( xor edi, [esp + W4 * 4] ) \
754
AS2( mov [esp + W1 * 4], edi ) \
755
AS2( lea z, [edi + z + 0xCA62C1D6] ) \
765
void SHA::AsmTransform(const byte* data, word32 times)
768
#define AS1(x) asm(#x);
769
#define AS2(x, y) asm(#x ", " #y);
772
asm(".intel_syntax noprefix"); \
773
AS2( movd mm3, edi ) \
774
AS2( movd mm4, ebx ) \
775
AS2( movd mm5, esi ) \
776
AS2( movd mm6, ebp ) \
777
AS2( mov ecx, DWORD PTR [ebp + 8] ) \
778
AS2( mov edi, DWORD PTR [ebp + 12] ) \
779
AS2( mov eax, DWORD PTR [ebp + 16] )
782
AS2( movd ebp, mm6 ) \
783
AS2( movd esi, mm5 ) \
784
AS2( movd ebx, mm4 ) \
785
AS2( mov esp, ebp ) \
786
AS2( movd edi, mm3 ) \
790
#define AS1(x) __asm x
791
#define AS2(x, y) __asm x, y
795
AS2( mov ebp, esp ) \
796
AS2( movd mm3, edi ) \
797
AS2( movd mm4, ebx ) \
798
AS2( movd mm5, esi ) \
799
AS2( movd mm6, ebp ) \
800
AS2( mov edi, data ) \
801
AS2( mov eax, times )
804
AS2( movd ebp, mm6 ) \
805
AS2( movd esi, mm5 ) \
806
AS2( movd ebx, mm4 ) \
807
AS2( movd edi, mm3 ) \
808
AS2( mov esp, ebp ) \
818
#ifdef OLD_GCC_OFFSET
819
AS2( add esi, 20 ) // digest_[0]
821
AS2( add esi, 16 ) // digest_[0]
824
AS2( movd mm2, eax ) // store times_
825
AS2( movd mm1, esi ) // store digest_
827
AS2( sub esp, 68 ) // make room on stack
831
// byte reverse 16 words of input, 4 at a time, put on stack for W[]
834
AS2( mov eax, [edi] )
835
AS2( mov ebx, [edi + 4] )
836
AS2( mov ecx, [edi + 8] )
837
AS2( mov edx, [edi + 12] )
844
AS2( mov [esp], eax )
845
AS2( mov [esp + 4], ebx )
846
AS2( mov [esp + 8], ecx )
847
AS2( mov [esp + 12], edx )
850
AS2( mov eax, [edi + 16] )
851
AS2( mov ebx, [edi + 20] )
852
AS2( mov ecx, [edi + 24] )
853
AS2( mov edx, [edi + 28] )
860
AS2( mov [esp + 16], eax )
861
AS2( mov [esp + 20], ebx )
862
AS2( mov [esp + 24], ecx )
863
AS2( mov [esp + 28], edx )
867
AS2( mov eax, [edi + 32] )
868
AS2( mov ebx, [edi + 36] )
869
AS2( mov ecx, [edi + 40] )
870
AS2( mov edx, [edi + 44] )
877
AS2( mov [esp + 32], eax )
878
AS2( mov [esp + 36], ebx )
879
AS2( mov [esp + 40], ecx )
880
AS2( mov [esp + 44], edx )
884
AS2( mov eax, [edi + 48] )
885
AS2( mov ebx, [edi + 52] )
886
AS2( mov ecx, [edi + 56] )
887
AS2( mov edx, [edi + 60] )
894
AS2( mov [esp + 48], eax )
895
AS2( mov [esp + 52], ebx )
896
AS2( mov [esp + 56], ecx )
897
AS2( mov [esp + 60], edx )
899
AS2( mov [esp + 64], edi ) // store edi for end
902
AS2( mov eax, [esi] ) // a1
903
AS2( mov ebx, [esi + 4] ) // b1
904
AS2( mov ecx, [esi + 8] ) // c1
905
AS2( mov edx, [esi + 12] ) // d1
906
AS2( mov ebp, [esi + 16] ) // e1
909
ASMR0(eax, ebx, ecx, edx, ebp, 0)
910
ASMR0(ebp, eax, ebx, ecx, edx, 1)
911
ASMR0(edx, ebp, eax, ebx, ecx, 2)
912
ASMR0(ecx, edx, ebp, eax, ebx, 3)
913
ASMR0(ebx, ecx, edx, ebp, eax, 4)
914
ASMR0(eax, ebx, ecx, edx, ebp, 5)
915
ASMR0(ebp, eax, ebx, ecx, edx, 6)
916
ASMR0(edx, ebp, eax, ebx, ecx, 7)
917
ASMR0(ecx, edx, ebp, eax, ebx, 8)
918
ASMR0(ebx, ecx, edx, ebp, eax, 9)
919
ASMR0(eax, ebx, ecx, edx, ebp, 10)
920
ASMR0(ebp, eax, ebx, ecx, edx, 11)
921
ASMR0(edx, ebp, eax, ebx, ecx, 12)
922
ASMR0(ecx, edx, ebp, eax, ebx, 13)
923
ASMR0(ebx, ecx, edx, ebp, eax, 14)
924
ASMR0(eax, ebx, ecx, edx, ebp, 15)
926
ASMR1(ebp, eax, ebx, ecx, edx, 16, 0, 2, 8, 13)
927
ASMR1(edx, ebp, eax, ebx, ecx, 17, 1, 3, 9, 14)
928
ASMR1(ecx, edx, ebp, eax, ebx, 18, 2, 4, 10, 15)
929
ASMR1(ebx, ecx, edx, ebp, eax, 19, 3, 5, 11, 0)
931
ASMR2(eax, ebx, ecx, edx, ebp, 20, 4, 6, 12, 1)
932
ASMR2(ebp, eax, ebx, ecx, edx, 21, 5, 7, 13, 2)
933
ASMR2(edx, ebp, eax, ebx, ecx, 22, 6, 8, 14, 3)
934
ASMR2(ecx, edx, ebp, eax, ebx, 23, 7, 9, 15, 4)
935
ASMR2(ebx, ecx, edx, ebp, eax, 24, 8, 10, 0, 5)
936
ASMR2(eax, ebx, ecx, edx, ebp, 25, 9, 11, 1, 6)
937
ASMR2(ebp, eax, ebx, ecx, edx, 26, 10, 12, 2, 7)
938
ASMR2(edx, ebp, eax, ebx, ecx, 27, 11, 13, 3, 8)
939
ASMR2(ecx, edx, ebp, eax, ebx, 28, 12, 14, 4, 9)
940
ASMR2(ebx, ecx, edx, ebp, eax, 29, 13, 15, 5, 10)
941
ASMR2(eax, ebx, ecx, edx, ebp, 30, 14, 0, 6, 11)
942
ASMR2(ebp, eax, ebx, ecx, edx, 31, 15, 1, 7, 12)
943
ASMR2(edx, ebp, eax, ebx, ecx, 32, 0, 2, 8, 13)
944
ASMR2(ecx, edx, ebp, eax, ebx, 33, 1, 3, 9, 14)
945
ASMR2(ebx, ecx, edx, ebp, eax, 34, 2, 4, 10, 15)
946
ASMR2(eax, ebx, ecx, edx, ebp, 35, 3, 5, 11, 0)
947
ASMR2(ebp, eax, ebx, ecx, edx, 36, 4, 6, 12, 1)
948
ASMR2(edx, ebp, eax, ebx, ecx, 37, 5, 7, 13, 2)
949
ASMR2(ecx, edx, ebp, eax, ebx, 38, 6, 8, 14, 3)
950
ASMR2(ebx, ecx, edx, ebp, eax, 39, 7, 9, 15, 4)
953
ASMR3(eax, ebx, ecx, edx, ebp, 40, 8, 10, 0, 5)
954
ASMR3(ebp, eax, ebx, ecx, edx, 41, 9, 11, 1, 6)
955
ASMR3(edx, ebp, eax, ebx, ecx, 42, 10, 12, 2, 7)
956
ASMR3(ecx, edx, ebp, eax, ebx, 43, 11, 13, 3, 8)
957
ASMR3(ebx, ecx, edx, ebp, eax, 44, 12, 14, 4, 9)
958
ASMR3(eax, ebx, ecx, edx, ebp, 45, 13, 15, 5, 10)
959
ASMR3(ebp, eax, ebx, ecx, edx, 46, 14, 0, 6, 11)
960
ASMR3(edx, ebp, eax, ebx, ecx, 47, 15, 1, 7, 12)
961
ASMR3(ecx, edx, ebp, eax, ebx, 48, 0, 2, 8, 13)
962
ASMR3(ebx, ecx, edx, ebp, eax, 49, 1, 3, 9, 14)
963
ASMR3(eax, ebx, ecx, edx, ebp, 50, 2, 4, 10, 15)
964
ASMR3(ebp, eax, ebx, ecx, edx, 51, 3, 5, 11, 0)
965
ASMR3(edx, ebp, eax, ebx, ecx, 52, 4, 6, 12, 1)
966
ASMR3(ecx, edx, ebp, eax, ebx, 53, 5, 7, 13, 2)
967
ASMR3(ebx, ecx, edx, ebp, eax, 54, 6, 8, 14, 3)
968
ASMR3(eax, ebx, ecx, edx, ebp, 55, 7, 9, 15, 4)
969
ASMR3(ebp, eax, ebx, ecx, edx, 56, 8, 10, 0, 5)
970
ASMR3(edx, ebp, eax, ebx, ecx, 57, 9, 11, 1, 6)
971
ASMR3(ecx, edx, ebp, eax, ebx, 58, 10, 12, 2, 7)
972
ASMR3(ebx, ecx, edx, ebp, eax, 59, 11, 13, 3, 8)
974
ASMR4(eax, ebx, ecx, edx, ebp, 60, 12, 14, 4, 9)
975
ASMR4(ebp, eax, ebx, ecx, edx, 61, 13, 15, 5, 10)
976
ASMR4(edx, ebp, eax, ebx, ecx, 62, 14, 0, 6, 11)
977
ASMR4(ecx, edx, ebp, eax, ebx, 63, 15, 1, 7, 12)
978
ASMR4(ebx, ecx, edx, ebp, eax, 64, 0, 2, 8, 13)
979
ASMR4(eax, ebx, ecx, edx, ebp, 65, 1, 3, 9, 14)
980
ASMR4(ebp, eax, ebx, ecx, edx, 66, 2, 4, 10, 15)
981
ASMR4(edx, ebp, eax, ebx, ecx, 67, 3, 5, 11, 0)
982
ASMR4(ecx, edx, ebp, eax, ebx, 68, 4, 6, 12, 1)
983
ASMR4(ebx, ecx, edx, ebp, eax, 69, 5, 7, 13, 2)
984
ASMR4(eax, ebx, ecx, edx, ebp, 70, 6, 8, 14, 3)
985
ASMR4(ebp, eax, ebx, ecx, edx, 71, 7, 9, 15, 4)
986
ASMR4(edx, ebp, eax, ebx, ecx, 72, 8, 10, 0, 5)
987
ASMR4(ecx, edx, ebp, eax, ebx, 73, 9, 11, 1, 6)
988
ASMR4(ebx, ecx, edx, ebp, eax, 74, 10, 12, 2, 7)
989
ASMR4(eax, ebx, ecx, edx, ebp, 75, 11, 13, 3, 8)
990
ASMR4(ebp, eax, ebx, ecx, edx, 76, 12, 14, 4, 9)
991
ASMR4(edx, ebp, eax, ebx, ecx, 77, 13, 15, 5, 10)
992
ASMR4(ecx, edx, ebp, eax, ebx, 78, 14, 0, 6, 11)
993
ASMR4(ebx, ecx, edx, ebp, eax, 79, 15, 1, 7, 12)
996
AS2( movd esi, mm1 ) // digest_
998
AS2( add [esi], eax ) // write out
999
AS2( add [esi + 4], ebx )
1000
AS2( add [esi + 8], ecx )
1001
AS2( add [esi + 12], edx )
1002
AS2( add [esi + 16], ebp )
1005
AS2( movd ebp, mm2 ) // times
1007
AS2( mov edi, DWORD PTR [esp + 64] ) // data
1009
AS2( add edi, 64 ) // next round of data
1010
AS2( mov [esp + 64], edi ) // restore
1013
AS2( movd mm2, ebp )
1014
AS1( jnz loopStart )
1021
#endif // DO_SHA_ASM