2
EXTERNDEF ?Te@rdtable@CryptoPP@@3PA_KA:FAR
3
EXTERNDEF ?g_cacheLineSize@CryptoPP@@3IA:FAR
11
neg rcx ; rcx is negative index
20
lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
21
jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero
28
adc rax, rax ; store carry into rax (return result register)
37
neg rcx ; rcx is negative index
46
lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
47
jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero
54
adc rax, rax ; store carry into rax (return result register)
60
Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
68
mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA
69
mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA]
73
movdqa xmm3, XMMWORD PTR [rdx+16+rax]
74
movdqa [(r8+16*12)], xmm3
75
lea rax, [rdx+rax+2*16]
78
movdqa xmm0, [rax+rbp]
79
movdqa XMMWORD PTR [(r8+0)+rbp], xmm0
83
movdqa xmm4, [rax+rbp]
102
test DWORD PTR [(r8+16*18+8)], 1
104
mov rbp, [(r8+16*14)]
109
mov al, BYTE PTR [rbp+15]
116
xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
118
xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
121
xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
123
xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
128
xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
130
xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
133
xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
135
xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
139
xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
141
xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
144
xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
146
xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
149
xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
151
xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
154
xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
156
mov eax, [(r8+16*12)+0*4]
157
mov edi, [(r8+16*12)+2*4]
158
mov r10d, [(r8+16*12)+3*4]
160
xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
162
xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
164
xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
167
xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
169
mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
170
xor ebx, [(r8+16*12)+1*4]
172
xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
175
xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
177
xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
180
xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
182
xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
184
xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
186
xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
189
mov [(r8+0)+3*4], r10d
190
mov [(r8+0)+0*4], eax
191
mov [(r8+0)+1*4], ebx
192
mov [(r8+0)+2*4], edi
195
mov r11d, [(r8+16*12)+0*4]
196
mov ebx, [(r8+16*12)+1*4]
197
mov ecx, [(r8+16*12)+2*4]
198
mov edx, [(r8+16*12)+3*4]
200
mov rax, [(r8+16*14)]
202
mov rbp, [(r8+16*14)+8]
211
xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
213
xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
216
xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
218
xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
223
xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
225
xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
228
xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
230
xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
234
xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
236
xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
239
xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
241
xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
244
xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
246
xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
249
xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
251
xor r11d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
259
mov eax, [(r8+0)+0*4]
260
mov ebx, [(r8+0)+1*4]
265
xor edx, DWORD PTR [rsi+rcx*8+3]
267
xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
269
mov ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
271
xor ecx, [(r8+0)+2*4]
273
xor eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
275
mov edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
276
xor edx, [(r8+0)+3*4]
281
mov r10d, [(r8+0)-4*16+3*4]
282
mov edi, [(r8+0)-4*16+2*4]
284
xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
287
xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
290
xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
292
xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
295
xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
297
mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
299
xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
301
mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
303
xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
305
xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
308
xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
310
xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
313
xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
315
xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
317
xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
319
xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
321
xor eax, [(r8+0)-4*16+0*4]
322
xor ebx, [(r8+0)-4*16+1*4]
325
mov r10d, [(r8+0)-4*16+7*4]
326
mov edi, [(r8+0)-4*16+6*4]
328
xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
331
xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
334
xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
336
xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
339
xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
341
mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
343
xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
345
mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
347
xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
349
xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
352
xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
354
xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
357
xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
359
xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
361
xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
363
xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
365
xor eax, [(r8+0)-4*16+4*4]
366
xor ebx, [(r8+0)-4*16+5*4]
373
movzx edi, BYTE PTR [rsi+rbp*8+1]
375
xor edi, DWORD PTR [rsi+rbp*8+0]
376
mov WORD PTR [(r8+16*13)+2], di
378
movzx edi, BYTE PTR [rsi+rbp*8+1]
380
xor edi, DWORD PTR [rsi+rbp*8+0]
381
mov WORD PTR [(r8+16*13)+6], di
384
movzx edi, BYTE PTR [rsi+rbp*8+1]
386
xor edi, DWORD PTR [rsi+rbp*8+0]
387
mov WORD PTR [(r8+16*13)+10], di
390
movzx edi, BYTE PTR [rsi+rbp*8+1]
392
xor edi, DWORD PTR [rsi+rbp*8+0]
393
mov WORD PTR [(r8+16*13)+14], di
396
movzx edi, BYTE PTR [rsi+rbp*8+1]
398
xor edi, DWORD PTR [rsi+rbp*8+0]
399
mov WORD PTR [(r8+16*13)+12], di
402
movzx edi, BYTE PTR [rsi+rbp*8+1]
404
xor edi, DWORD PTR [rsi+rbp*8+0]
405
mov WORD PTR [(r8+16*13)+0], di
407
movzx edi, BYTE PTR [rsi+rbp*8+1]
409
xor edi, DWORD PTR [rsi+rbp*8+0]
410
mov WORD PTR [(r8+16*13)+4], di
412
movzx edi, BYTE PTR [rsi+rbp*8+1]
414
xor edi, DWORD PTR [rsi+rbp*8+0]
415
mov WORD PTR [(r8+16*13)+8], di
416
mov rax, [(r8+16*14)+16]
417
mov rbx, [(r8+16*14)+24]
418
mov rcx, [(r8+16*18+8)]
422
movdqa xmm0, [(r8+16*16)+16]
423
paddq xmm0, [(r8+16*14)+16]
424
movdqa [(r8+16*14)+16], xmm0
425
pxor xmm2, [(r8+16*13)]
428
mov [(r8+16*18+8)], rcx
431
movdqa xmm0, [(r8+16*16)]
432
paddd xmm0, [(r8+16*14)]
433
movdqa [(r8+16*14)], xmm0
436
mov rbp, [(r8+16*18)]
443
Rijndael_Enc_AdvancedProcessBlocks ENDP
446
GCM_AuthenticateBlocks_2K PROC FRAME
463
movdqa xmm5, XMMWORD PTR [rsi + 32 + 1024 + rdi]
465
movdqa xmm4, XMMWORD PTR [rsi + 32 + 1024 + rdi]
468
movdqa xmm3, XMMWORD PTR [rsi + 32 + 1024 + rdi]
470
movdqa xmm2, XMMWORD PTR [rsi + 32 + 1024 + rdi]
475
pxor xmm5, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
477
pxor xmm4, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
480
pxor xmm3, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
482
pxor xmm2, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
487
pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
489
pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
492
pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
494
pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
499
pxor xmm5, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
501
pxor xmm4, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
504
pxor xmm3, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
506
pxor xmm2, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
511
pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
513
pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
516
pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
518
pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
523
pxor xmm5, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
525
pxor xmm4, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
528
pxor xmm3, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
530
pxor xmm2, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
535
pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
537
pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
540
pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
542
pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
544
pxor xmm5, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
546
pxor xmm4, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
549
pxor xmm3, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
551
pxor xmm2, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
560
movzx eax, WORD PTR [r11 + rdi*2]
567
xor ax, WORD PTR [r11 + rdi*2]
571
xor ax, WORD PTR [r11 + rdi*2]
582
GCM_AuthenticateBlocks_2K ENDP
585
GCM_AuthenticateBlocks_64K PROC FRAME
599
pxor xmm0, [rsi + 32 + (0*4+0)*256*16 + rdi*8]
602
pxor xmm0, [rsi + 32 + (0*4+1)*256*16 + rdi*8]
606
pxor xmm0, [rsi + 32 + (0*4+2)*256*16 + rdi*8]
609
pxor xmm0, [rsi + 32 + (0*4+3)*256*16 + rdi*8]
614
pxor xmm0, [rsi + 32 + (1*4+0)*256*16 + rdi*8]
617
pxor xmm0, [rsi + 32 + (1*4+1)*256*16 + rdi*8]
621
pxor xmm0, [rsi + 32 + (1*4+2)*256*16 + rdi*8]
624
pxor xmm0, [rsi + 32 + (1*4+3)*256*16 + rdi*8]
629
pxor xmm0, [rsi + 32 + (2*4+0)*256*16 + rdi*8]
632
pxor xmm0, [rsi + 32 + (2*4+1)*256*16 + rdi*8]
636
pxor xmm0, [rsi + 32 + (2*4+2)*256*16 + rdi*8]
639
pxor xmm0, [rsi + 32 + (2*4+3)*256*16 + rdi*8]
644
pxor xmm0, [rsi + 32 + (3*4+0)*256*16 + rdi*8]
647
pxor xmm0, [rsi + 32 + (3*4+1)*256*16 + rdi*8]
651
pxor xmm0, [rsi + 32 + (3*4+2)*256*16 + rdi*8]
654
pxor xmm0, [rsi + 32 + (3*4+3)*256*16 + rdi*8]
662
GCM_AuthenticateBlocks_64K ENDP