3
# ====================================================================
4
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5
# project. The module is, however, dual licensed under OpenSSL and
6
# CRYPTOGAMS licenses depending on where you obtain it. For further
7
# details see http://www.openssl.org/~appro/cryptogams/.
8
# ====================================================================
12
# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
13
# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
14
# [you'll notice a lot of resemblance], such as compressed S-boxes
15
# in little-endian byte order, prefetch of these tables in CBC mode,
16
# as well as avoiding L1 cache aliasing between stack frame and key
17
# schedule and already mentioned tables, compressed Td4...
19
# Performance in number of cycles per processed byte for 128-bit key:
21
# ECB encrypt ECB decrypt CBC large chunk
24
# Core 2 30 43 14.5(*)
26
# (*) with hyper-threading off
30
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
32
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
34
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
35
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
36
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
37
die "can't locate x86_64-xlate.pl";
39
open STDOUT,"| $^X $xlate $flavour $output";
41
$verticalspin=1; # unlike 32-bit version $verticalspin performs
42
# ~15% better on both AMD and Intel cores
43
$speed_limit=512; # see aes-586.pl for details
51
$acc0="%esi"; $mask80="%rsi";
52
$acc1="%edi"; $maskfe="%rdi";
53
$acc2="%ebp"; $mask1b="%rbp";
63
sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; }
64
sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
65
$r =~ s/%[er]([sd]i)/%\1l/;
66
$r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
67
sub LO() { my $r=shift; $r =~ s/%r([a-z]+)/%e\1/;
68
$r =~ s/%r([0-9]+)/%r\1d/; $r; }
71
while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
77
while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; }
78
$code.=sprintf"0x%08x\n",$last;
85
while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; }
86
$code.=sprintf"0x%02x\n",$last&0xff;
90
{ my $t3="%r8d"; # zaps $inp!
93
# favor 3-way issue Opteron pipeline...
94
movzb `&lo("$s0")`,$acc0
95
movzb `&lo("$s1")`,$acc1
96
movzb `&lo("$s2")`,$acc2
97
mov 0($sbox,$acc0,8),$t0
98
mov 0($sbox,$acc1,8),$t1
99
mov 0($sbox,$acc2,8),$t2
101
movzb `&hi("$s1")`,$acc0
102
movzb `&hi("$s2")`,$acc1
103
movzb `&lo("$s3")`,$acc2
104
xor 3($sbox,$acc0,8),$t0
105
xor 3($sbox,$acc1,8),$t1
106
mov 0($sbox,$acc2,8),$t3
108
movzb `&hi("$s3")`,$acc0
110
movzb `&hi("$s0")`,$acc2
111
xor 3($sbox,$acc0,8),$t2
113
xor 3($sbox,$acc2,8),$t3
119
movzb `&lo("$s2")`,$acc0
120
movzb `&lo("$s3")`,$acc1
121
movzb `&lo("$s0")`,$acc2
122
xor 2($sbox,$acc0,8),$t0
123
xor 2($sbox,$acc1,8),$t1
124
xor 2($sbox,$acc2,8),$t2
126
movzb `&hi("$s3")`,$acc0
127
movzb `&hi("$s0")`,$acc1
128
movzb `&lo("$s1")`,$acc2
129
xor 1($sbox,$acc0,8),$t0
130
xor 1($sbox,$acc1,8),$t1
131
xor 2($sbox,$acc2,8),$t3
134
movzb `&hi("$s1")`,$acc1
135
movzb `&hi("$s2")`,$acc2
137
xor 1($sbox,$acc1,8),$t2
138
xor 1($sbox,$acc2,8),$t3
150
{ my $t3="%r8d"; # zaps $inp!
153
movzb `&lo("$s0")`,$acc0
154
movzb `&lo("$s1")`,$acc1
155
movzb `&lo("$s2")`,$acc2
156
movzb 2($sbox,$acc0,8),$t0
157
movzb 2($sbox,$acc1,8),$t1
158
movzb 2($sbox,$acc2,8),$t2
160
movzb `&lo("$s3")`,$acc0
161
movzb `&hi("$s1")`,$acc1
162
movzb `&hi("$s2")`,$acc2
163
movzb 2($sbox,$acc0,8),$t3
164
mov 0($sbox,$acc1,8),$acc1 #$t0
165
mov 0($sbox,$acc2,8),$acc2 #$t1
167
and \$0x0000ff00,$acc1
168
and \$0x0000ff00,$acc2
174
movzb `&hi("$s3")`,$acc0
175
movzb `&hi("$s0")`,$acc1
177
mov 0($sbox,$acc0,8),$acc0 #$t2
178
mov 0($sbox,$acc1,8),$acc1 #$t3
180
and \$0x0000ff00,$acc0
181
and \$0x0000ff00,$acc1
187
movzb `&lo("$s2")`,$acc0
188
movzb `&lo("$s3")`,$acc1
189
movzb `&lo("$s0")`,$acc2
190
mov 0($sbox,$acc0,8),$acc0 #$t0
191
mov 0($sbox,$acc1,8),$acc1 #$t1
192
mov 0($sbox,$acc2,8),$acc2 #$t2
194
and \$0x00ff0000,$acc0
195
and \$0x00ff0000,$acc1
196
and \$0x00ff0000,$acc2
202
movzb `&lo("$s1")`,$acc0
203
movzb `&hi("$s3")`,$acc1
204
movzb `&hi("$s0")`,$acc2
205
mov 0($sbox,$acc0,8),$acc0 #$t3
206
mov 2($sbox,$acc1,8),$acc1 #$t0
207
mov 2($sbox,$acc2,8),$acc2 #$t1
209
and \$0x00ff0000,$acc0
210
and \$0xff000000,$acc1
211
and \$0xff000000,$acc2
217
movzb `&hi("$s1")`,$acc0
218
movzb `&hi("$s2")`,$acc1
220
mov 2($sbox,$acc0,8),$acc0 #$t2
221
mov 2($sbox,$acc1,8),$acc1 #$t3
224
and \$0xff000000,$acc0
225
and \$0xff000000,$acc1
244
my $out=($t0,$t1,$t2,$s[0])[$i];
251
$code.=" movzb ".&lo($s[0]).",$out\n";
252
$code.=" mov $s[2],$tmp1\n" if ($i!=3);
253
$code.=" lea 16($key),$key\n" if ($i==0);
255
$code.=" movzb ".&hi($s[1]).",$tmp0\n";
256
$code.=" mov 0($sbox,$out,8),$out\n";
258
$code.=" shr \$16,$tmp1\n";
259
$code.=" mov $s[3],$tmp2\n" if ($i!=3);
260
$code.=" xor 3($sbox,$tmp0,8),$out\n";
262
$code.=" movzb ".&lo($tmp1).",$tmp1\n";
263
$code.=" shr \$24,$tmp2\n";
264
$code.=" xor 4*$i($key),$out\n";
266
$code.=" xor 2($sbox,$tmp1,8),$out\n";
267
$code.=" xor 1($sbox,$tmp2,8),$out\n";
269
$code.=" mov $t0,$s[1]\n" if ($i==3);
270
$code.=" mov $t1,$s[2]\n" if ($i==3);
271
$code.=" mov $t2,$s[3]\n" if ($i==3);
280
my $out=($t0,$t1,$t2,$s[0])[$i];
287
$code.=" movzb ".&lo($s[0]).",$out\n";
288
$code.=" mov $s[2],$tmp1\n" if ($i!=3);
290
$code.=" mov 2($sbox,$out,8),$out\n";
291
$code.=" shr \$16,$tmp1\n";
292
$code.=" mov $s[3],$tmp2\n" if ($i!=3);
294
$code.=" and \$0x000000ff,$out\n";
295
$code.=" movzb ".&hi($s[1]).",$tmp0\n";
296
$code.=" movzb ".&lo($tmp1).",$tmp1\n";
297
$code.=" shr \$24,$tmp2\n";
299
$code.=" mov 0($sbox,$tmp0,8),$tmp0\n";
300
$code.=" mov 0($sbox,$tmp1,8),$tmp1\n";
301
$code.=" mov 2($sbox,$tmp2,8),$tmp2\n";
303
$code.=" and \$0x0000ff00,$tmp0\n";
304
$code.=" and \$0x00ff0000,$tmp1\n";
305
$code.=" and \$0xff000000,$tmp2\n";
307
$code.=" xor $tmp0,$out\n";
308
$code.=" mov $t0,$s[1]\n" if ($i==3);
309
$code.=" xor $tmp1,$out\n";
310
$code.=" mov $t1,$s[2]\n" if ($i==3);
311
$code.=" xor $tmp2,$out\n";
312
$code.=" mov $t2,$s[3]\n" if ($i==3);
317
.type _x86_64_AES_encrypt,\@abi-omnipotent
320
xor 0($key),$s0 # xor with key
325
mov 240($key),$rnds # load key->rounds
331
if ($verticalspin) { &encvert(); }
332
else { &encstep(0,$s0,$s1,$s2,$s3);
333
&encstep(1,$s1,$s2,$s3,$s0);
334
&encstep(2,$s2,$s3,$s0,$s1);
335
&encstep(3,$s3,$s0,$s1,$s2);
341
if ($verticalspin) { &enclastvert(); }
342
else { &enclast(0,$s0,$s1,$s2,$s3);
343
&enclast(1,$s1,$s2,$s3,$s0);
344
&enclast(2,$s2,$s3,$s0,$s1);
345
&enclast(3,$s3,$s0,$s1,$s2);
347
xor 16+0($key),$s0 # xor with key
354
.byte 0xf3,0xc3 # rep ret
355
.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
358
# it's possible to implement this by shifting tN by 8, filling least
359
# significant byte with byte load and finally bswap-ing at the end,
360
# but such partial register load kills Core 2...
362
{ my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d");
365
movzb `&lo("$s0")`,$t0
366
movzb `&lo("$s1")`,$t1
367
movzb `&lo("$s2")`,$t2
368
movzb ($sbox,$t0,1),$t0
369
movzb ($sbox,$t1,1),$t1
370
movzb ($sbox,$t2,1),$t2
372
movzb `&lo("$s3")`,$t3
373
movzb `&hi("$s1")`,$acc0
374
movzb `&hi("$s2")`,$acc1
375
movzb ($sbox,$t3,1),$t3
376
movzb ($sbox,$acc0,1),$t4 #$t0
377
movzb ($sbox,$acc1,1),$t5 #$t1
379
movzb `&hi("$s3")`,$acc2
380
movzb `&hi("$s0")`,$acc0
382
movzb ($sbox,$acc2,1),$acc2 #$t2
383
movzb ($sbox,$acc0,1),$acc0 #$t3
386
movzb `&lo("$s2")`,$acc1
389
movzb ($sbox,$acc1,1),$acc1 #$t0
393
movzb `&lo("$s3")`,$t4
396
movzb `&lo("$s0")`,$t5
399
movzb ($sbox,$t4,1),$t4 #$t1
400
movzb ($sbox,$t5,1),$t5 #$t2
404
movzb `&lo("$s1")`,$acc2
405
movzb `&hi("$s3")`,$acc0
407
movzb ($sbox,$acc2,1),$acc2 #$t3
408
movzb ($sbox,$acc0,1),$acc0 #$t0
411
movzb `&hi("$s0")`,$acc1
414
movzb ($sbox,$acc1,1),$acc1 #$t1
415
movzb ($sbox,$s2,1),$s3 #$t3
416
movzb ($sbox,$s1,1),$s2 #$t2
437
sub enctransform_ref()
439
my ($acc,$r2,$tmp)=("%r8d","%r9d","%r13d");
443
and \$0x80808080,$acc
449
and \$0x1b1b1b1b,$acc
463
# unlike decrypt case it does not pay off to parallelize enctransform
465
{ my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d");
470
and \$0x80808080,$acc0
471
and \$0x80808080,$acc1
480
and \$0xfefefefe,$r20
481
and \$0xfefefefe,$r21
482
and \$0x1b1b1b1b,$acc0
483
and \$0x1b1b1b1b,$acc1
495
and \$0x80808080,$acc0
496
and \$0x80808080,$acc1
516
and \$0xfefefefe,$r20
517
and \$0xfefefefe,$r21
518
and \$0x1b1b1b1b,$acc0
519
and \$0x1b1b1b1b,$acc1
531
mov 0($sbox),$acc0 # prefetch Te4
547
.type _x86_64_AES_encrypt_compact,\@abi-omnipotent
549
_x86_64_AES_encrypt_compact:
550
lea 128($sbox),$inp # size optimization
551
mov 0-128($inp),$acc1 # prefetch Te4
552
mov 32-128($inp),$acc2
555
mov 128-128($inp),$acc1
556
mov 160-128($inp),$acc2
557
mov 192-128($inp),$t0
558
mov 224-128($inp),$t1
559
jmp .Lenc_loop_compact
562
xor 0($key),$s0 # xor with key
571
je .Lenc_compact_done
575
jmp .Lenc_loop_compact
582
.byte 0xf3,0xc3 # rep ret
583
.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
586
# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
589
.type AES_encrypt,\@function,3
599
# allocate frame "above" key schedule
601
lea -63(%rdx),%rcx # %rdx is key argument
609
mov %rsi,16(%rsp) # save out
610
mov %r10,24(%rsp) # save real stack pointer
614
mov 240($key),$rnds # load rounds
616
mov 0(%rdi),$s0 # load input vector
622
lea ($key,$rnds),%rbp
623
mov $key,(%rsp) # key schedule
624
mov %rbp,8(%rsp) # end of key schedule
626
# pick Te4 copy which can't "overlap" with stack frame or key schedule
627
lea .LAES_Te+2048(%rip),$sbox
631
lea ($sbox,%rbp),$sbox
633
call _x86_64_AES_encrypt_compact
635
mov 16(%rsp),$out # restore out
636
mov 24(%rsp),%rsi # restore saved stack pointer
637
mov $s0,0($out) # write output vector
651
.size AES_encrypt,.-AES_encrypt
654
#------------------------------------------------------------------#
657
{ my $t3="%r8d"; # zaps $inp!
660
# favor 3-way issue Opteron pipeline...
661
movzb `&lo("$s0")`,$acc0
662
movzb `&lo("$s1")`,$acc1
663
movzb `&lo("$s2")`,$acc2
664
mov 0($sbox,$acc0,8),$t0
665
mov 0($sbox,$acc1,8),$t1
666
mov 0($sbox,$acc2,8),$t2
668
movzb `&hi("$s3")`,$acc0
669
movzb `&hi("$s0")`,$acc1
670
movzb `&lo("$s3")`,$acc2
671
xor 3($sbox,$acc0,8),$t0
672
xor 3($sbox,$acc1,8),$t1
673
mov 0($sbox,$acc2,8),$t3
675
movzb `&hi("$s1")`,$acc0
677
movzb `&hi("$s2")`,$acc2
678
xor 3($sbox,$acc0,8),$t2
680
xor 3($sbox,$acc2,8),$t3
686
movzb `&lo("$s2")`,$acc0
687
movzb `&lo("$s3")`,$acc1
688
movzb `&lo("$s0")`,$acc2
689
xor 2($sbox,$acc0,8),$t0
690
xor 2($sbox,$acc1,8),$t1
691
xor 2($sbox,$acc2,8),$t2
693
movzb `&hi("$s1")`,$acc0
694
movzb `&hi("$s2")`,$acc1
695
movzb `&lo("$s1")`,$acc2
696
xor 1($sbox,$acc0,8),$t0
697
xor 1($sbox,$acc1,8),$t1
698
xor 2($sbox,$acc2,8),$t3
700
movzb `&hi("$s3")`,$acc0
702
movzb `&hi("$s0")`,$acc2
703
xor 1($sbox,$acc0,8),$t2
705
xor 1($sbox,$acc2,8),$t3
717
{ my $t3="%r8d"; # zaps $inp!
720
lea 2048($sbox),$sbox # size optimization
721
movzb `&lo("$s0")`,$acc0
722
movzb `&lo("$s1")`,$acc1
723
movzb `&lo("$s2")`,$acc2
724
movzb ($sbox,$acc0,1),$t0
725
movzb ($sbox,$acc1,1),$t1
726
movzb ($sbox,$acc2,1),$t2
728
movzb `&lo("$s3")`,$acc0
729
movzb `&hi("$s3")`,$acc1
730
movzb `&hi("$s0")`,$acc2
731
movzb ($sbox,$acc0,1),$t3
732
movzb ($sbox,$acc1,1),$acc1 #$t0
733
movzb ($sbox,$acc2,1),$acc2 #$t1
742
movzb `&hi("$s1")`,$acc0
743
movzb `&hi("$s2")`,$acc1
745
movzb ($sbox,$acc0,1),$acc0 #$t2
746
movzb ($sbox,$acc1,1),$acc1 #$t3
755
movzb `&lo("$s2")`,$acc0
756
movzb `&lo("$s3")`,$acc1
757
movzb `&lo("$s0")`,$acc2
758
movzb ($sbox,$acc0,1),$acc0 #$t0
759
movzb ($sbox,$acc1,1),$acc1 #$t1
760
movzb ($sbox,$acc2,1),$acc2 #$t2
770
movzb `&lo("$s1")`,$acc0
771
movzb `&hi("$s1")`,$acc1
772
movzb `&hi("$s2")`,$acc2
773
movzb ($sbox,$acc0,1),$acc0 #$t3
774
movzb ($sbox,$acc1,1),$acc1 #$t0
775
movzb ($sbox,$acc2,1),$acc2 #$t1
785
movzb `&hi("$s3")`,$acc0
786
movzb `&hi("$s0")`,$acc1
788
movzb ($sbox,$acc0,1),$acc0 #$t2
789
movzb ($sbox,$acc1,1),$acc1 #$t3
800
lea -2048($sbox),$sbox
813
my $out=($t0,$t1,$t2,$s[0])[$i];
815
$code.=" mov $s[0],$out\n" if ($i!=3);
816
$tmp1=$s[2] if ($i==3);
817
$code.=" mov $s[2],$tmp1\n" if ($i!=3);
818
$code.=" and \$0xFF,$out\n";
820
$code.=" mov 0($sbox,$out,8),$out\n";
821
$code.=" shr \$16,$tmp1\n";
822
$tmp2=$s[3] if ($i==3);
823
$code.=" mov $s[3],$tmp2\n" if ($i!=3);
825
$tmp0=$s[1] if ($i==3);
826
$code.=" movzb ".&hi($s[1]).",$tmp0\n";
827
$code.=" and \$0xFF,$tmp1\n";
828
$code.=" shr \$24,$tmp2\n";
830
$code.=" xor 3($sbox,$tmp0,8),$out\n";
831
$code.=" xor 2($sbox,$tmp1,8),$out\n";
832
$code.=" xor 1($sbox,$tmp2,8),$out\n";
834
$code.=" mov $t2,$s[1]\n" if ($i==3);
835
$code.=" mov $t1,$s[2]\n" if ($i==3);
836
$code.=" mov $t0,$s[3]\n" if ($i==3);
845
my $out=($t0,$t1,$t2,$s[0])[$i];
847
$code.=" mov $s[0],$out\n" if ($i!=3);
848
$tmp1=$s[2] if ($i==3);
849
$code.=" mov $s[2],$tmp1\n" if ($i!=3);
850
$code.=" and \$0xFF,$out\n";
852
$code.=" movzb 2048($sbox,$out,1),$out\n";
853
$code.=" shr \$16,$tmp1\n";
854
$tmp2=$s[3] if ($i==3);
855
$code.=" mov $s[3],$tmp2\n" if ($i!=3);
857
$tmp0=$s[1] if ($i==3);
858
$code.=" movzb ".&hi($s[1]).",$tmp0\n";
859
$code.=" and \$0xFF,$tmp1\n";
860
$code.=" shr \$24,$tmp2\n";
862
$code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n";
863
$code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n";
864
$code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n";
866
$code.=" shl \$8,$tmp0\n";
867
$code.=" shl \$16,$tmp1\n";
868
$code.=" shl \$24,$tmp2\n";
870
$code.=" xor $tmp0,$out\n";
871
$code.=" mov $t2,$s[1]\n" if ($i==3);
872
$code.=" xor $tmp1,$out\n";
873
$code.=" mov $t1,$s[2]\n" if ($i==3);
874
$code.=" xor $tmp2,$out\n";
875
$code.=" mov $t0,$s[3]\n" if ($i==3);
880
.type _x86_64_AES_decrypt,\@abi-omnipotent
883
xor 0($key),$s0 # xor with key
888
mov 240($key),$rnds # load key->rounds
894
if ($verticalspin) { &decvert(); }
895
else { &decstep(0,$s0,$s3,$s2,$s1);
896
&decstep(1,$s1,$s0,$s3,$s2);
897
&decstep(2,$s2,$s1,$s0,$s3);
898
&decstep(3,$s3,$s2,$s1,$s0);
901
xor 0($key),$s0 # xor with key
911
if ($verticalspin) { &declastvert(); }
912
else { &declast(0,$s0,$s3,$s2,$s1);
913
&declast(1,$s1,$s0,$s3,$s2);
914
&declast(2,$s2,$s1,$s0,$s3);
915
&declast(3,$s3,$s2,$s1,$s0);
917
xor 16+0($key),$s0 # xor with key
924
.byte 0xf3,0xc3 # rep ret
925
.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
929
{ my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d");
932
movzb `&lo("$s0")`,$t0
933
movzb `&lo("$s1")`,$t1
934
movzb `&lo("$s2")`,$t2
935
movzb ($sbox,$t0,1),$t0
936
movzb ($sbox,$t1,1),$t1
937
movzb ($sbox,$t2,1),$t2
939
movzb `&lo("$s3")`,$t3
940
movzb `&hi("$s3")`,$acc0
941
movzb `&hi("$s0")`,$acc1
942
movzb ($sbox,$t3,1),$t3
943
movzb ($sbox,$acc0,1),$t4 #$t0
944
movzb ($sbox,$acc1,1),$t5 #$t1
946
movzb `&hi("$s1")`,$acc2
947
movzb `&hi("$s2")`,$acc0
949
movzb ($sbox,$acc2,1),$acc2 #$t2
950
movzb ($sbox,$acc0,1),$acc0 #$t3
953
movzb `&lo("$s2")`,$acc1
956
movzb ($sbox,$acc1,1),$acc1 #$t0
960
movzb `&lo("$s3")`,$t4
963
movzb `&lo("$s0")`,$t5
966
movzb ($sbox,$t4,1),$t4 #$t1
967
movzb ($sbox,$t5,1),$t5 #$t2
971
movzb `&lo("$s1")`,$acc2
972
movzb `&hi("$s1")`,$acc0
974
movzb ($sbox,$acc2,1),$acc2 #$t3
975
movzb ($sbox,$acc0,1),$acc0 #$t0
978
movzb `&hi("$s2")`,$acc1
981
movzb ($sbox,$acc1,1),$s1 #$t1
985
movzb `&hi("$s3")`,$acc1
988
movzb ($sbox,$acc1,1),$s2 #$t2
989
movzb ($sbox,$s0,1),$s3 #$t3
1004
# parallelized version! input is pair of 64-bit values: %rax=s1.s0
1005
# and %rcx=s3.s2, output is four 32-bit values in %eax=s0, %ebx=s1,
1006
# %ecx=s2 and %edx=s3.
1008
{ my ($tp10,$tp20,$tp40,$tp80,$acc0)=("%rax","%r8", "%r9", "%r10","%rbx");
1009
my ($tp18,$tp28,$tp48,$tp88,$acc8)=("%rcx","%r11","%r12","%r13","%rdx");
1010
my $prefetch = shift;
1020
lea ($tp10,$tp10),$tp20
1022
lea ($tp18,$tp18),$tp28
1039
lea ($tp20,$tp20),$tp40
1041
lea ($tp28,$tp28),$tp48
1058
xor $tp10,$tp20 # tp2^=tp1
1060
xor $tp18,$tp28 # tp2^=tp1
1063
lea ($tp40,$tp40),$tp80
1064
lea ($tp48,$tp48),$tp88
1065
xor $tp10,$tp40 # tp4^=tp1
1066
xor $tp18,$tp48 # tp4^=tp1
1074
xor $tp80,$tp10 # tp1^=tp8
1075
xor $tp88,$tp18 # tp1^=tp8
1076
xor $tp80,$tp20 # tp2^tp1^=tp8
1077
xor $tp88,$tp28 # tp2^tp1^=tp8
1080
xor $tp80,$tp40 # tp4^tp1^=tp8
1081
xor $tp88,$tp48 # tp4^tp1^=tp8
1084
xor $tp20,$tp80 # tp8^=tp8^tp2^tp1=tp2^tp1
1085
xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1
1086
rol \$8,`&LO("$tp10")` # ROTATE(tp1^tp8,8)
1087
rol \$8,`&LO("$tp18")` # ROTATE(tp1^tp8,8)
1088
xor $tp40,$tp80 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
1089
xor $tp48,$tp88 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
1091
rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8)
1092
rol \$8,`&LO("$acc8")` # ROTATE(tp1^tp8,8)
1093
xor `&LO("$tp80")`,`&LO("$tp10")`
1094
xor `&LO("$tp88")`,`&LO("$tp18")`
1097
xor `&LO("$tp80")`,`&LO("$acc0")`
1098
xor `&LO("$tp88")`,`&LO("$acc8")`
1104
rol \$24,`&LO("$tp20")` # ROTATE(tp2^tp1^tp8,24)
1105
rol \$24,`&LO("$tp28")` # ROTATE(tp2^tp1^tp8,24)
1106
rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24)
1107
rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24)
1108
xor `&LO("$tp20")`,`&LO("$tp10")`
1109
xor `&LO("$tp28")`,`&LO("$tp18")`
1112
xor `&LO("$tp80")`,`&LO("$acc0")`
1113
xor `&LO("$tp88")`,`&LO("$acc8")`
1115
`"mov 0($sbox),$mask80" if ($prefetch)`
1118
`"mov 64($sbox),$maskfe" if ($prefetch)`
1119
rol \$16,`&LO("$tp40")` # ROTATE(tp4^tp1^tp8,16)
1120
rol \$16,`&LO("$tp48")` # ROTATE(tp4^tp1^tp8,16)
1121
`"mov 128($sbox),$mask1b" if ($prefetch)`
1122
rol \$16,`&LO("$tp20")` # ROTATE(tp4^tp1^tp8,16)
1123
rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16)
1124
`"mov 192($sbox),$tp80" if ($prefetch)`
1125
xor `&LO("$tp40")`,`&LO("$tp10")`
1126
xor `&LO("$tp48")`,`&LO("$tp18")`
1127
`"mov 256($sbox),$tp88" if ($prefetch)`
1128
xor `&LO("$tp20")`,`&LO("$acc0")`
1129
xor `&LO("$tp28")`,`&LO("$acc8")`
1134
.type _x86_64_AES_decrypt_compact,\@abi-omnipotent
1136
_x86_64_AES_decrypt_compact:
1137
lea 128($sbox),$inp # size optimization
1138
mov 0-128($inp),$acc1 # prefetch Td4
1139
mov 32-128($inp),$acc2
1140
mov 64-128($inp),$t0
1141
mov 96-128($inp),$t1
1142
mov 128-128($inp),$acc1
1143
mov 160-128($inp),$acc2
1144
mov 192-128($inp),$t0
1145
mov 224-128($inp),$t1
1146
jmp .Ldec_loop_compact
1150
xor 0($key),$s0 # xor with key
1159
je .Ldec_compact_done
1161
mov 256+0($sbox),$mask80
1164
mov 256+8($sbox),$maskfe
1167
mov 256+16($sbox),$mask1b
1171
jmp .Ldec_loop_compact
1178
.byte 0xf3,0xc3 # rep ret
1179
.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
1182
# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
1185
.type AES_decrypt,\@function,3
1195
# allocate frame "above" key schedule
1197
lea -63(%rdx),%rcx # %rdx is key argument
1205
mov %rsi,16(%rsp) # save out
1206
mov %r10,24(%rsp) # save real stack pointer
1210
mov 240($key),$rnds # load rounds
1212
mov 0(%rdi),$s0 # load input vector
1218
lea ($key,$rnds),%rbp
1219
mov $key,(%rsp) # key schedule
1220
mov %rbp,8(%rsp) # end of key schedule
1222
# pick Td4 copy which can't "overlap" with stack frame or key schedule
1223
lea .LAES_Td+2048(%rip),$sbox
1227
lea ($sbox,%rbp),$sbox
1228
shr \$3,%rbp # recall "magic" constants!
1231
call _x86_64_AES_decrypt_compact
1233
mov 16(%rsp),$out # restore out
1234
mov 24(%rsp),%rsi # restore saved stack pointer
1235
mov $s0,0($out) # write output vector
1249
.size AES_decrypt,.-AES_decrypt
1251
#------------------------------------------------------------------#
1256
movz %dl,%esi # rk[i]>>0
1257
movzb -128(%rbp,%rsi),%ebx
1258
movz %dh,%esi # rk[i]>>8
1262
movzb -128(%rbp,%rsi),%ebx
1264
movz %dl,%esi # rk[i]>>16
1267
movzb -128(%rbp,%rsi),%ebx
1268
movz %dh,%esi # rk[i]>>24
1272
movzb -128(%rbp,%rsi),%ebx
1276
xor 1024-128(%rbp,%rcx,4),%eax # rcon
1280
# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1283
.globl AES_set_encrypt_key
1284
.type AES_set_encrypt_key,\@function,3
1286
AES_set_encrypt_key:
1289
push %r12 # redundant, but allows to share
1290
push %r13 # exception handler...
1296
call _x86_64_AES_set_encrypt_key
1307
.size AES_set_encrypt_key,.-AES_set_encrypt_key
1309
.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent
1311
_x86_64_AES_set_encrypt_key:
1312
mov %esi,%ecx # %ecx=bits
1313
mov %rdi,%rsi # %rsi=userKey
1314
mov %rdx,%rdi # %rdi=key
1321
lea .LAES_Te(%rip),%rbp
1322
lea 2048+128(%rbp),%rbp
1325
mov 0-128(%rbp),%eax
1326
mov 32-128(%rbp),%ebx
1327
mov 64-128(%rbp),%r8d
1328
mov 96-128(%rbp),%edx
1329
mov 128-128(%rbp),%eax
1330
mov 160-128(%rbp),%ebx
1331
mov 192-128(%rbp),%r8d
1332
mov 224-128(%rbp),%edx
1340
mov \$-2,%rax # invalid number of bits
1344
mov 0(%rsi),%rax # copy first 4 dwords
1354
mov 0(%rdi),%eax # rk[0]
1355
mov 12(%rdi),%edx # rk[3]
1360
mov %eax,16(%rdi) # rk[4]
1362
mov %eax,20(%rdi) # rk[5]
1364
mov %eax,24(%rdi) # rk[6]
1366
mov %eax,28(%rdi) # rk[7]
1372
movl \$10,80(%rdi) # setup number of rounds
1377
mov 0(%rsi),%rax # copy first 6 dwords
1389
mov 0(%rdi),%eax # rk[0]
1390
mov 20(%rdi),%edx # rk[5]
1395
mov %eax,24(%rdi) # rk[6]
1397
mov %eax,28(%rdi) # rk[7]
1399
mov %eax,32(%rdi) # rk[8]
1401
mov %eax,36(%rdi) # rk[9]
1408
mov %eax,40(%rdi) # rk[10]
1410
mov %eax,44(%rdi) # rk[11]
1415
movl \$12,72(%rdi) # setup number of rounds
1420
mov 0(%rsi),%rax # copy first 8 dwords
1434
mov 0(%rdi),%eax # rk[0]
1435
mov 28(%rdi),%edx # rk[4]
1440
mov %eax,32(%rdi) # rk[8]
1442
mov %eax,36(%rdi) # rk[9]
1444
mov %eax,40(%rdi) # rk[10]
1446
mov %eax,44(%rdi) # rk[11]
1453
mov 16(%rdi),%eax # rk[4]
1454
movz %dl,%esi # rk[11]>>0
1455
movzb -128(%rbp,%rsi),%ebx
1456
movz %dh,%esi # rk[11]>>8
1459
movzb -128(%rbp,%rsi),%ebx
1462
movz %dl,%esi # rk[11]>>16
1465
movzb -128(%rbp,%rsi),%ebx
1466
movz %dh,%esi # rk[11]>>24
1470
movzb -128(%rbp,%rsi),%ebx
1474
mov %eax,48(%rdi) # rk[12]
1476
mov %eax,52(%rdi) # rk[13]
1478
mov %eax,56(%rdi) # rk[14]
1480
mov %eax,60(%rdi) # rk[15]
1485
movl \$14,48(%rdi) # setup number of rounds
1492
.byte 0xf3,0xc3 # rep ret
1493
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
1497
{ my ($i,$ptr,$te,$td) = @_;
1498
my ($tp1,$tp2,$tp4,$tp8,$acc)=("%eax","%ebx","%edi","%edx","%r8d");
1502
and \$0x80808080,$acc
1505
lea 0($tp1,$tp1),$tp2
1507
and \$0xfefefefe,$tp2
1508
and \$0x1b1b1b1b,$acc
1512
and \$0x80808080,$acc
1515
lea 0($tp2,$tp2),$tp4
1517
and \$0xfefefefe,$tp4
1518
and \$0x1b1b1b1b,$acc
1519
xor $tp1,$tp2 # tp2^tp1
1523
and \$0x80808080,$acc
1527
lea 0($tp4,$tp4),$tp8
1528
xor $tp1,$tp4 # tp4^tp1
1529
and \$0xfefefefe,$tp8
1530
and \$0x1b1b1b1b,$acc
1533
xor $tp8,$tp1 # tp1^tp8
1534
rol \$8,$tp1 # ROTATE(tp1^tp8,8)
1535
xor $tp8,$tp2 # tp2^tp1^tp8
1536
xor $tp8,$tp4 # tp4^tp1^tp8
1538
xor $tp4,$tp8 # tp8^(tp8^tp4^tp1)^(tp8^tp2^tp1)=tp8^tp4^tp2
1541
rol \$24,$tp2 # ROTATE(tp2^tp1^tp8,24)
1543
rol \$16,$tp4 # ROTATE(tp4^tp1^tp8,16)
1550
# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1553
.globl AES_set_decrypt_key
1554
.type AES_set_decrypt_key,\@function,3
1556
AES_set_decrypt_key:
1563
push %rdx # save key schedule
1566
call _x86_64_AES_set_encrypt_key
1567
mov (%rsp),%r8 # restore key schedule
1571
mov 240(%r8),%r14d # pull number of rounds
1573
lea (%rdi,%r14d,4),%rcx
1575
lea (%r8,%rcx,4),%rdi # pointer to last chunk
1591
lea .LAES_Te+2048+1024(%rip),%rax # rcon
1593
mov 40(%rax),$mask80
1594
mov 48(%rax),$maskfe
1595
mov 56(%rax),$mask1b
1625
.size AES_set_decrypt_key,.-AES_set_decrypt_key
1628
# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
1629
# size_t length, const AES_KEY *key,
1630
# unsigned char *ivp,const int enc);
1632
# stack frame layout
1633
# -8(%rsp) return address
1634
my $keyp="0(%rsp)"; # one to pass as $key
1635
my $keyend="8(%rsp)"; # &(keyp->rd_key[4*keyp->rounds])
1636
my $_rsp="16(%rsp)"; # saved %rsp
1637
my $_inp="24(%rsp)"; # copy of 1st parameter, inp
1638
my $_out="32(%rsp)"; # copy of 2nd parameter, out
1639
my $_len="40(%rsp)"; # copy of 3rd parameter, length
1640
my $_key="48(%rsp)"; # copy of 4th parameter, key
1641
my $_ivp="56(%rsp)"; # copy of 5th parameter, ivp
1642
my $ivec="64(%rsp)"; # ivec[16]
1643
my $aes_key="80(%rsp)"; # copy of aes_key
1644
my $mark="80+240(%rsp)"; # copy of aes_key->rounds
1647
.globl AES_cbc_encrypt
1648
.type AES_cbc_encrypt,\@function,6
1650
.extern OPENSSL_ia32cap_P
1652
cmp \$0,%rdx # check length
1664
mov %r9d,%r9d # clear upper half of enc
1666
lea .LAES_Te(%rip),$sbox
1669
lea .LAES_Td(%rip),$sbox
1672
mov OPENSSL_ia32cap_P(%rip),%r10d
1673
cmp \$$speed_limit,%rdx
1674
jb .Lcbc_slow_prologue
1676
jnz .Lcbc_slow_prologue
1678
jc .Lcbc_slow_prologue
1680
# allocate aligned stack frame...
1681
lea -88-248(%rsp),$key
1684
# ... and make sure it doesn't alias with AES_T[ed] modulo 4096
1686
lea 2304($sbox),%r11
1688
and \$0xFFF,%r10 # s = $sbox&0xfff
1689
and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff
1690
and \$0xFFF,%r12 # p = %rsp&0xfff
1692
cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1693
jb .Lcbc_te_break_out
1697
.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz
1706
#add \$8,%rsp # reserve for return address!
1707
mov $key,$_rsp # save %rsp
1709
mov %rdi,$_inp # save copy of inp
1710
mov %rsi,$_out # save copy of out
1711
mov %rdx,$_len # save copy of len
1712
mov %rcx,$_key # save copy of key
1713
mov %r8,$_ivp # save copy of ivp
1714
movl \$0,$mark # copy of aes_key->rounds = 0;
1715
mov %r8,%rbp # rearrange input arguments
1721
mov 240($key),%eax # key->rounds
1722
# do we copy key schedule to stack?
1736
.long 0x90A548F3 # rep movsq
1737
mov %eax,(%rdi) # copy aes_key->rounds
1739
mov $key,$keyp # save key pointer
1748
lea 128($sbox),$sbox
1750
jnz .Lcbc_prefetch_te
1751
lea -2304($sbox),$sbox
1756
#----------------------------- ENCRYPT -----------------------------#
1757
mov 0(%rbp),$s0 # load iv
1763
.Lcbc_fast_enc_loop:
1768
mov $keyp,$key # restore key
1769
mov $inp,$_inp # if ($verticalspin) save inp
1771
call _x86_64_AES_encrypt
1773
mov $_inp,$inp # if ($verticalspin) restore inp
1785
jnz .Lcbc_fast_enc_loop
1786
mov $_ivp,%rbp # restore ivp
1787
mov $s0,0(%rbp) # save ivec
1792
jmp .Lcbc_fast_cleanup
1794
#----------------------------- DECRYPT -----------------------------#
1798
je .Lcbc_fast_dec_in_place
1802
.Lcbc_fast_dec_loop:
1803
mov 0($inp),$s0 # read input
1807
mov $keyp,$key # restore key
1808
mov $inp,$_inp # if ($verticalspin) save inp
1810
call _x86_64_AES_decrypt
1812
mov $ivec,%rbp # load ivp
1813
mov $_inp,$inp # if ($verticalspin) restore inp
1814
mov $_len,%r10 # load len
1815
xor 0(%rbp),$s0 # xor iv
1819
mov $inp,%rbp # current input, next iv
1822
mov %r10,$_len # update len
1823
mov %rbp,$ivec # update ivp
1825
mov $s0,0($out) # write output
1832
jnz .Lcbc_fast_dec_loop
1833
mov $_ivp,%r12 # load user ivp
1834
mov 0(%rbp),%r10 # load iv
1836
mov %r10,0(%r12) # copy back to user
1838
jmp .Lcbc_fast_cleanup
1841
.Lcbc_fast_dec_in_place:
1842
mov 0(%rbp),%r10 # copy iv to stack
1847
.Lcbc_fast_dec_in_place_loop:
1848
mov 0($inp),$s0 # load input
1852
mov $keyp,$key # restore key
1853
mov $inp,$_inp # if ($verticalspin) save inp
1855
call _x86_64_AES_decrypt
1857
mov $_inp,$inp # if ($verticalspin) restore inp
1864
mov 0($inp),%r11 # load input
1867
jz .Lcbc_fast_dec_in_place_done
1869
mov %r11,0+$ivec # copy input to iv
1872
mov $s0,0($out) # save output [zaps input]
1880
jmp .Lcbc_fast_dec_in_place_loop
1881
.Lcbc_fast_dec_in_place_done:
1883
mov %r11,0(%rdi) # copy iv back to user
1886
mov $s0,0($out) # save output [zaps input]
1893
cmpl \$0,$mark # was the key schedule copied?
1898
.long 0x90AB48F3 # rep stosq
1902
#--------------------------- SLOW ROUTINE ---------------------------#
1904
.Lcbc_slow_prologue:
1905
# allocate aligned stack frame...
1908
# ... just "above" key schedule
1909
lea -88-63(%rcx),%r10
1916
#add \$8,%rsp # reserve for return address!
1917
mov %rbp,$_rsp # save %rsp
1919
#mov %rdi,$_inp # save copy of inp
1920
#mov %rsi,$_out # save copy of out
1921
#mov %rdx,$_len # save copy of len
1922
#mov %rcx,$_key # save copy of key
1923
mov %r8,$_ivp # save copy of ivp
1924
mov %r8,%rbp # rearrange input arguments
1932
mov $key,$keyp # save key pointer
1934
lea ($key,%rax),%rax
1937
# pick Te4 copy which can't "overlap" with stack frame or key scdedule
1938
lea 2048($sbox),$sbox
1939
lea 768-8(%rsp),%rax
1942
lea ($sbox,%rax),$sbox
1947
#--------------------------- SLOW ENCRYPT ---------------------------#
1948
test \$-16,%r10 # check upon length
1949
mov 0(%rbp),$s0 # load iv
1953
jz .Lcbc_slow_enc_tail # short input...
1956
.Lcbc_slow_enc_loop:
1961
mov $keyp,$key # restore key
1962
mov $inp,$_inp # save inp
1963
mov $out,$_out # save out
1964
mov %r10,$_len # save len
1966
call _x86_64_AES_encrypt_compact
1968
mov $_inp,$inp # restore inp
1969
mov $_out,$out # restore out
1970
mov $_len,%r10 # restore len
1980
jnz .Lcbc_slow_enc_loop
1982
jnz .Lcbc_slow_enc_tail
1983
mov $_ivp,%rbp # restore ivp
1984
mov $s0,0(%rbp) # save ivec
1992
.Lcbc_slow_enc_tail:
1998
.long 0x9066A4F3 # rep movsb
1999
mov \$16,%rcx # zero tail
2002
.long 0x9066AAF3 # rep stosb
2003
mov $out,$inp # this is not a mistake!
2004
mov \$16,%r10 # len=16
2007
jmp .Lcbc_slow_enc_loop # one more spin...
2008
#--------------------------- SLOW DECRYPT ---------------------------#
2012
add %rax,$sbox # recall "magic" constants!
2014
mov 0(%rbp),%r11 # copy iv to stack
2020
.Lcbc_slow_dec_loop:
2021
mov 0($inp),$s0 # load input
2025
mov $keyp,$key # restore key
2026
mov $inp,$_inp # save inp
2027
mov $out,$_out # save out
2028
mov %r10,$_len # save len
2030
call _x86_64_AES_decrypt_compact
2032
mov $_inp,$inp # restore inp
2033
mov $_out,$out # restore out
2040
mov 0($inp),%r11 # load input
2043
jc .Lcbc_slow_dec_partial
2044
jz .Lcbc_slow_dec_done
2046
mov %r11,0+$ivec # copy input to iv
2049
mov $s0,0($out) # save output [can zap input]
2056
jmp .Lcbc_slow_dec_loop
2057
.Lcbc_slow_dec_done:
2059
mov %r11,0(%rdi) # copy iv back to user
2062
mov $s0,0($out) # save output [can zap input]
2070
.Lcbc_slow_dec_partial:
2072
mov %r11,0(%rdi) # copy iv back to user
2075
mov $s0,0+$ivec # save output to stack
2083
.long 0x9066A4F3 # rep movsb
2100
.size AES_cbc_encrypt,.-AES_cbc_encrypt
2108
&_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
2109
&_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
2110
&_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
2111
&_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
2112
&_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
2113
&_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
2114
&_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
2115
&_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
2116
&_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
2117
&_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
2118
&_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
2119
&_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
2120
&_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
2121
&_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
2122
&_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
2123
&_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
2124
&_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
2125
&_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
2126
&_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
2127
&_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
2128
&_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
2129
&_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
2130
&_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
2131
&_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
2132
&_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
2133
&_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
2134
&_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
2135
&_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
2136
&_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
2137
&_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
2138
&_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
2139
&_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
2140
&_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
2141
&_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
2142
&_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
2143
&_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
2144
&_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
2145
&_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
2146
&_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
2147
&_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
2148
&_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
2149
&_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
2150
&_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
2151
&_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
2152
&_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
2153
&_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
2154
&_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
2155
&_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
2156
&_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
2157
&_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
2158
&_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
2159
&_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
2160
&_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
2161
&_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
2162
&_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
2163
&_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
2164
&_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
2165
&_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
2166
&_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
2167
&_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
2168
&_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
2169
&_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
2170
&_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
2171
&_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
2173
#Te4 # four copies of Te4 to choose from to avoid L1 aliasing
2174
&data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
2175
&data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
2176
&data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
2177
&data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
2178
&data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
2179
&data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
2180
&data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
2181
&data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
2182
&data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
2183
&data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
2184
&data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
2185
&data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
2186
&data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
2187
&data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
2188
&data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
2189
&data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
2190
&data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
2191
&data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
2192
&data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
2193
&data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
2194
&data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
2195
&data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
2196
&data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
2197
&data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
2198
&data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
2199
&data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
2200
&data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
2201
&data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
2202
&data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
2203
&data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
2204
&data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
2205
&data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
2207
&data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
2208
&data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
2209
&data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
2210
&data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
2211
&data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
2212
&data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
2213
&data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
2214
&data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
2215
&data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
2216
&data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
2217
&data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
2218
&data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
2219
&data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
2220
&data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
2221
&data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
2222
&data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
2223
&data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
2224
&data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
2225
&data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
2226
&data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
2227
&data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
2228
&data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
2229
&data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
2230
&data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
2231
&data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
2232
&data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
2233
&data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
2234
&data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
2235
&data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
2236
&data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
2237
&data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
2238
&data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
2240
&data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
2241
&data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
2242
&data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
2243
&data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
2244
&data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
2245
&data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
2246
&data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
2247
&data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
2248
&data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
2249
&data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
2250
&data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
2251
&data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
2252
&data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
2253
&data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
2254
&data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
2255
&data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
2256
&data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
2257
&data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
2258
&data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
2259
&data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
2260
&data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
2261
&data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
2262
&data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
2263
&data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
2264
&data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
2265
&data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
2266
&data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
2267
&data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
2268
&data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
2269
&data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
2270
&data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
2271
&data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
2273
&data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
2274
&data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
2275
&data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
2276
&data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
2277
&data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
2278
&data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
2279
&data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
2280
&data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
2281
&data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
2282
&data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
2283
&data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
2284
&data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
2285
&data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
2286
&data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
2287
&data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
2288
&data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
2289
&data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
2290
&data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
2291
&data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
2292
&data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
2293
&data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
2294
&data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
2295
&data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
2296
&data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
2297
&data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
2298
&data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
2299
&data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
2300
&data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
2301
&data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
2302
&data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
2303
&data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
2304
&data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
2307
.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
2308
.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
2309
.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
2310
.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
2316
&_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
2317
&_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
2318
&_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
2319
&_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
2320
&_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
2321
&_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
2322
&_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
2323
&_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
2324
&_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
2325
&_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
2326
&_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
2327
&_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
2328
&_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
2329
&_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
2330
&_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
2331
&_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
2332
&_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
2333
&_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
2334
&_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
2335
&_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
2336
&_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
2337
&_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
2338
&_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
2339
&_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
2340
&_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
2341
&_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
2342
&_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
2343
&_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
2344
&_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
2345
&_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
2346
&_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
2347
&_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
2348
&_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
2349
&_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
2350
&_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
2351
&_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
2352
&_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
2353
&_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
2354
&_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
2355
&_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
2356
&_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
2357
&_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
2358
&_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
2359
&_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
2360
&_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
2361
&_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
2362
&_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
2363
&_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
2364
&_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
2365
&_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
2366
&_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
2367
&_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
2368
&_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
2369
&_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
2370
&_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
2371
&_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
2372
&_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
2373
&_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
2374
&_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
2375
&_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
2376
&_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
2377
&_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
2378
&_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
2379
&_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
2381
#Td4: # four copies of Td4 to choose from to avoid L1 aliasing
2382
&data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
2383
&data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
2384
&data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
2385
&data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
2386
&data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
2387
&data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
2388
&data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
2389
&data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
2390
&data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
2391
&data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
2392
&data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
2393
&data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
2394
&data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
2395
&data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
2396
&data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
2397
&data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
2398
&data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
2399
&data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
2400
&data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
2401
&data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
2402
&data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
2403
&data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
2404
&data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
2405
&data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
2406
&data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
2407
&data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
2408
&data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
2409
&data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
2410
&data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
2411
&data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
2412
&data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
2413
&data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
2415
.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
2416
.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
2418
&data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
2419
&data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
2420
&data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
2421
&data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
2422
&data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
2423
&data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
2424
&data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
2425
&data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
2426
&data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
2427
&data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
2428
&data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
2429
&data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
2430
&data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
2431
&data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
2432
&data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
2433
&data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
2434
&data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
2435
&data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
2436
&data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
2437
&data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
2438
&data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
2439
&data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
2440
&data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
2441
&data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
2442
&data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
2443
&data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
2444
&data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
2445
&data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
2446
&data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
2447
&data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
2448
&data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
2449
&data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
2451
.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
2452
.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
2454
&data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
2455
&data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
2456
&data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
2457
&data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
2458
&data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
2459
&data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
2460
&data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
2461
&data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
2462
&data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
2463
&data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
2464
&data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
2465
&data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
2466
&data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
2467
&data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
2468
&data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
2469
&data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
2470
&data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
2471
&data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
2472
&data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
2473
&data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
2474
&data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
2475
&data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
2476
&data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
2477
&data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
2478
&data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
2479
&data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
2480
&data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
2481
&data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
2482
&data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
2483
&data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
2484
&data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
2485
&data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
2487
.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
2488
.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
2490
&data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
2491
&data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
2492
&data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
2493
&data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
2494
&data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
2495
&data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
2496
&data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
2497
&data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
2498
&data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
2499
&data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
2500
&data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
2501
&data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
2502
&data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
2503
&data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
2504
&data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
2505
&data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
2506
&data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
2507
&data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
2508
&data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
2509
&data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
2510
&data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
2511
&data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
2512
&data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
2513
&data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
2514
&data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
2515
&data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
2516
&data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
2517
&data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
2518
&data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
2519
&data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
2520
&data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
2521
&data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
2523
.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
2524
.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
2525
.asciz "AES for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
2529
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
2530
# CONTEXT *context,DISPATCHER_CONTEXT *disp)
2538
.extern __imp_RtlVirtualUnwind
2539
.type block_se_handler,\@abi-omnipotent
2553
mov 120($context),%rax # pull context->Rax
2554
mov 248($context),%rbx # pull context->Rip
2556
mov 8($disp),%rsi # disp->ImageBase
2557
mov 56($disp),%r11 # disp->HandlerData
2559
mov 0(%r11),%r10d # HandlerData[0]
2560
lea (%rsi,%r10),%r10 # prologue label
2561
cmp %r10,%rbx # context->Rip<prologue label
2562
jb .Lin_block_prologue
2564
mov 152($context),%rax # pull context->Rsp
2566
mov 4(%r11),%r10d # HandlerData[1]
2567
lea (%rsi,%r10),%r10 # epilogue label
2568
cmp %r10,%rbx # context->Rip>=epilogue label
2569
jae .Lin_block_prologue
2571
mov 24(%rax),%rax # pull saved real stack pointer
2572
lea 48(%rax),%rax # adjust...
2580
mov %rbx,144($context) # restore context->Rbx
2581
mov %rbp,160($context) # restore context->Rbp
2582
mov %r12,216($context) # restore context->R12
2583
mov %r13,224($context) # restore context->R13
2584
mov %r14,232($context) # restore context->R14
2585
mov %r15,240($context) # restore context->R15
2587
.Lin_block_prologue:
2590
mov %rax,152($context) # restore context->Rsp
2591
mov %rsi,168($context) # restore context->Rsi
2592
mov %rdi,176($context) # restore context->Rdi
2594
jmp .Lcommon_seh_exit
2595
.size block_se_handler,.-block_se_handler
2597
.type key_se_handler,\@abi-omnipotent
2611
mov 120($context),%rax # pull context->Rax
2612
mov 248($context),%rbx # pull context->Rip
2614
mov 8($disp),%rsi # disp->ImageBase
2615
mov 56($disp),%r11 # disp->HandlerData
2617
mov 0(%r11),%r10d # HandlerData[0]
2618
lea (%rsi,%r10),%r10 # prologue label
2619
cmp %r10,%rbx # context->Rip<prologue label
2620
jb .Lin_key_prologue
2622
mov 152($context),%rax # pull context->Rsp
2624
mov 4(%r11),%r10d # HandlerData[1]
2625
lea (%rsi,%r10),%r10 # epilogue label
2626
cmp %r10,%rbx # context->Rip>=epilogue label
2627
jae .Lin_key_prologue
2637
mov %rbx,144($context) # restore context->Rbx
2638
mov %rbp,160($context) # restore context->Rbp
2639
mov %r12,216($context) # restore context->R12
2640
mov %r13,224($context) # restore context->R13
2641
mov %r14,232($context) # restore context->R14
2642
mov %r15,240($context) # restore context->R15
2647
mov %rax,152($context) # restore context->Rsp
2648
mov %rsi,168($context) # restore context->Rsi
2649
mov %rdi,176($context) # restore context->Rdi
2651
jmp .Lcommon_seh_exit
2652
.size key_se_handler,.-key_se_handler
2654
.type cbc_se_handler,\@abi-omnipotent
2668
mov 120($context),%rax # pull context->Rax
2669
mov 248($context),%rbx # pull context->Rip
2671
lea .Lcbc_prologue(%rip),%r10
2672
cmp %r10,%rbx # context->Rip<.Lcbc_prologue
2673
jb .Lin_cbc_prologue
2675
lea .Lcbc_fast_body(%rip),%r10
2676
cmp %r10,%rbx # context->Rip<.Lcbc_fast_body
2677
jb .Lin_cbc_frame_setup
2679
lea .Lcbc_slow_prologue(%rip),%r10
2680
cmp %r10,%rbx # context->Rip<.Lcbc_slow_prologue
2683
lea .Lcbc_slow_body(%rip),%r10
2684
cmp %r10,%rbx # context->Rip<.Lcbc_slow_body
2685
jb .Lin_cbc_frame_setup
2688
mov 152($context),%rax # pull context->Rsp
2690
lea .Lcbc_epilogue(%rip),%r10
2691
cmp %r10,%rbx # context->Rip>=.Lcbc_epilogue
2692
jae .Lin_cbc_prologue
2696
lea .Lcbc_popfq(%rip),%r10
2697
cmp %r10,%rbx # context->Rip>=.Lcbc_popfq
2698
jae .Lin_cbc_prologue
2700
mov `16-8`(%rax),%rax # biased $_rsp
2703
.Lin_cbc_frame_setup:
2710
mov %rbx,144($context) # restore context->Rbx
2711
mov %rbp,160($context) # restore context->Rbp
2712
mov %r12,216($context) # restore context->R12
2713
mov %r13,224($context) # restore context->R13
2714
mov %r14,232($context) # restore context->R14
2715
mov %r15,240($context) # restore context->R15
2720
mov %rax,152($context) # restore context->Rsp
2721
mov %rsi,168($context) # restore context->Rsi
2722
mov %rdi,176($context) # restore context->Rdi
2726
mov 40($disp),%rdi # disp->ContextRecord
2727
mov $context,%rsi # context
2728
mov \$`1232/8`,%ecx # sizeof(CONTEXT)
2729
.long 0xa548f3fc # cld; rep movsq
2732
xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
2733
mov 8(%rsi),%rdx # arg2, disp->ImageBase
2734
mov 0(%rsi),%r8 # arg3, disp->ControlPc
2735
mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
2736
mov 40(%rsi),%r10 # disp->ContextRecord
2737
lea 56(%rsi),%r11 # &disp->HandlerData
2738
lea 24(%rsi),%r12 # &disp->EstablisherFrame
2739
mov %r10,32(%rsp) # arg5
2740
mov %r11,40(%rsp) # arg6
2741
mov %r12,48(%rsp) # arg7
2742
mov %rcx,56(%rsp) # arg8, (NULL)
2743
call *__imp_RtlVirtualUnwind(%rip)
2745
mov \$1,%eax # ExceptionContinueSearch
2757
.size cbc_se_handler,.-cbc_se_handler
2761
.rva .LSEH_begin_AES_encrypt
2762
.rva .LSEH_end_AES_encrypt
2763
.rva .LSEH_info_AES_encrypt
2765
.rva .LSEH_begin_AES_decrypt
2766
.rva .LSEH_end_AES_decrypt
2767
.rva .LSEH_info_AES_decrypt
2769
.rva .LSEH_begin_AES_set_encrypt_key
2770
.rva .LSEH_end_AES_set_encrypt_key
2771
.rva .LSEH_info_AES_set_encrypt_key
2773
.rva .LSEH_begin_AES_set_decrypt_key
2774
.rva .LSEH_end_AES_set_decrypt_key
2775
.rva .LSEH_info_AES_set_decrypt_key
2777
.rva .LSEH_begin_AES_cbc_encrypt
2778
.rva .LSEH_end_AES_cbc_encrypt
2779
.rva .LSEH_info_AES_cbc_encrypt
2783
.LSEH_info_AES_encrypt:
2785
.rva block_se_handler
2786
.rva .Lenc_prologue,.Lenc_epilogue # HandlerData[]
2787
.LSEH_info_AES_decrypt:
2789
.rva block_se_handler
2790
.rva .Ldec_prologue,.Ldec_epilogue # HandlerData[]
2791
.LSEH_info_AES_set_encrypt_key:
2794
.rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[]
2795
.LSEH_info_AES_set_decrypt_key:
2798
.rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[]
2799
.LSEH_info_AES_cbc_encrypt:
2805
$code =~ s/\`([^\`]*)\`/eval($1)/gem;