3
# ====================================================================
4
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5
# project. The module is, however, dual licensed under OpenSSL and
6
# CRYPTOGAMS licenses depending on where you obtain it. For further
7
# details see http://www.openssl.org/~appro/cryptogams/.
8
# ====================================================================
14
# Software performance improvement over gcc-generated code is ~70% and
15
# in absolute terms is ~73 cycles per byte processed with 128-bit key.
16
# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17
# *strictly* in-order execution and issued instruction [in this case
18
# load value from memory is critical] has to complete before execution
19
# flow proceeds. S-boxes are compressed to 2KB[+256B].
21
# As for hardware acceleration support. It's basically a "teaser," as
22
# it can and should be improved in several ways. Most notably support
23
# for CBC is not utilized, nor multiple blocks are ever processed.
24
# Then software key schedule can be postponed till hardware support
25
# detection... Performance improvement over assembler is reportedly
26
# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27
# support is implemented.
31
# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32
# for 128-bit keys, if hardware support is detected.
36
# Add support for hardware AES192/256 and reschedule instructions to
37
# minimize/avoid Address Generation Interlock hazard and to favour
38
# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39
# almost 50% on z9. The gain is smaller on z10, because being dual-
40
# issue z10 makes it improssible to eliminate the interlock condition:
41
# critial path is not long enough. Yet it spends ~24 cycles per byte
42
# processed with 128-bit key.
44
# Unlike previous version hardware support detection takes place only
45
# at the moment of key schedule setup, which is denoted in key->rounds.
46
# This is done, because deferred key setup can't be made MT-safe, not
47
# for key lengthes longer than 128 bits.
49
# Add AES_cbc_encrypt, which gives incredible performance improvement,
50
# it was measured to be ~6.6x. It's less than previously mentioned 8x,
51
# because software implementation was optimized.
53
$softonly=0; # allow hardware support
55
$t0="%r0"; $mask="%r0";
57
$t2="%r2"; $inp="%r2";
58
$t3="%r3"; $out="%r3"; $bits="%r3";
74
while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
85
0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
86
0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
87
0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
88
0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
89
0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
90
0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
91
0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
92
0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
93
0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
94
0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
95
0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
96
0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
97
0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
98
0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
99
0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
100
0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
101
0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
102
0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
103
0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
104
0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
105
0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
106
0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
107
0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
108
0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
109
0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
110
0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
111
0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
112
0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
113
0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
114
0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
115
0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
116
0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
117
0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
118
0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
119
0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
120
0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
121
0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
122
0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
123
0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
124
0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
125
0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
126
0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
127
0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
128
0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
129
0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
130
0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
131
0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
132
0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
133
0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
134
0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
135
0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
136
0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
137
0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
138
0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
139
0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
140
0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
141
0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
142
0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
143
0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
144
0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
145
0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
146
0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
147
0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
148
0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
151
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
152
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
153
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
154
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
155
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
156
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
157
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
158
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
159
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
160
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
161
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
162
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
163
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
164
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
165
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
166
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
167
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
168
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
169
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
170
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
171
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
172
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
173
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
174
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
175
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
176
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
177
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
178
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
179
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
180
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
181
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
182
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
184
.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
185
.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
186
.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
188
.size AES_Te,.-AES_Te
190
# void AES_encrypt(const unsigned char *inp, unsigned char *out,
191
# const AES_KEY *key) {
193
.type AES_encrypt,\@function
196
$code.=<<___ if (!$softonly);
205
lghi %r3,16 # single block length
206
.long 0xb92e0042 # km %r4,%r2
207
brc 1,.-4 # can this happen?
221
bras $ra,_s390x_AES_encrypt
231
.size AES_encrypt,.-AES_encrypt
233
.type _s390x_AES_encrypt,\@function
242
llill $mask,`0xff<<3`
256
srlg $i1,$s1,`16-3` # i0
265
l $s0,0($s0,$tbl) # Te0[s0>>24]
266
l $t1,1($t1,$tbl) # Te3[s0>>0]
267
l $t2,2($t2,$tbl) # Te2[s0>>8]
268
l $t3,3($t3,$tbl) # Te1[s0>>16]
270
x $s0,3($i1,$tbl) # Te1[s1>>16]
271
l $s1,0($s1,$tbl) # Te0[s1>>24]
272
x $t2,1($i2,$tbl) # Te3[s1>>0]
273
x $t3,2($i3,$tbl) # Te2[s1>>8]
275
srlg $i1,$s2,`8-3` # i0
276
srlg $i2,$s2,`16-3` # i1
285
srlg $ra,$s3,`8-3` # i1
286
sllg $t1,$s3,`0+3` # i0
291
x $s0,2($i1,$tbl) # Te2[s2>>8]
292
x $s1,3($i2,$tbl) # Te1[s2>>16]
293
l $s2,0($s2,$tbl) # Te0[s2>>24]
294
x $t3,1($i3,$tbl) # Te3[s2>>0]
296
srlg $i3,$s3,`16-3` # i2
307
x $s0,1($t1,$tbl) # Te3[s3>>0]
308
x $s1,2($ra,$tbl) # Te2[s3>>8]
309
x $s2,3($i3,$tbl) # Te1[s3>>16]
310
l $s3,0($s3,$tbl) # Te0[s3>>24]
313
brct $rounds,.Lenc_loop
325
srlg $i1,$s1,`16-3` # i0
334
llgc $s0,2($s0,$tbl) # Te4[s0>>24]
335
llgc $t1,2($t1,$tbl) # Te4[s0>>0]
337
llgc $t2,2($t2,$tbl) # Te4[s0>>8]
338
llgc $t3,2($t3,$tbl) # Te4[s0>>16]
342
llgc $i1,2($i1,$tbl) # Te4[s1>>16]
343
llgc $s1,2($s1,$tbl) # Te4[s1>>24]
344
llgc $i2,2($i2,$tbl) # Te4[s1>>0]
345
llgc $i3,2($i3,$tbl) # Te4[s1>>8]
354
srlg $i1,$s2,`8-3` # i0
355
srlg $i2,$s2,`16-3` # i1
363
sllg $t1,$s3,`0+3` # i0
364
srlg $ra,$s3,`8-3` # i1
367
llgc $i1,2($i1,$tbl) # Te4[s2>>8]
368
llgc $i2,2($i2,$tbl) # Te4[s2>>16]
370
llgc $s2,2($s2,$tbl) # Te4[s2>>24]
371
llgc $i3,2($i3,$tbl) # Te4[s2>>0]
380
srlg $i3,$s3,`16-3` # i2
388
llgc $i1,2($t1,$tbl) # Te4[s3>>0]
389
llgc $i2,2($ra,$tbl) # Te4[s3>>8]
390
llgc $i3,2($i3,$tbl) # Te4[s3>>16]
391
llgc $s3,2($s3,$tbl) # Te4[s3>>24]
407
.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
411
.type AES_Td,\@object
416
0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
417
0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
418
0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
419
0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
420
0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
421
0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
422
0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
423
0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
424
0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
425
0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
426
0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
427
0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
428
0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
429
0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
430
0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
431
0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
432
0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
433
0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
434
0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
435
0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
436
0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
437
0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
438
0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
439
0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
440
0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
441
0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
442
0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
443
0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
444
0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
445
0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
446
0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
447
0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
448
0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
449
0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
450
0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
451
0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
452
0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
453
0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
454
0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
455
0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
456
0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
457
0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
458
0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
459
0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
460
0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
461
0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
462
0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
463
0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
464
0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
465
0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
466
0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
467
0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
468
0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
469
0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
470
0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
471
0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
472
0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
473
0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
474
0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
475
0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
476
0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
477
0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
478
0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
479
0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
482
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
483
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
484
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
485
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
486
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
487
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
488
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
489
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
490
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
491
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
492
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
493
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
494
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
495
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
496
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
497
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
498
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
499
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
500
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
501
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
502
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
503
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
504
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
505
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
506
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
507
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
508
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
509
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
510
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
511
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
512
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
513
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
514
.size AES_Td,.-AES_Td
516
# void AES_decrypt(const unsigned char *inp, unsigned char *out,
517
# const AES_KEY *key) {
519
.type AES_decrypt,\@function
522
$code.=<<___ if (!$softonly);
531
lghi %r3,16 # single block length
532
.long 0xb92e0042 # km %r4,%r2
533
brc 1,.-4 # can this happen?
547
bras $ra,_s390x_AES_decrypt
557
.size AES_decrypt,.-AES_decrypt
559
.type _s390x_AES_decrypt,\@function
568
llill $mask,`0xff<<3`
582
sllg $i1,$s1,`0+3` # i0
591
l $s0,0($s0,$tbl) # Td0[s0>>24]
592
l $t1,3($t1,$tbl) # Td1[s0>>16]
593
l $t2,2($t2,$tbl) # Td2[s0>>8]
594
l $t3,1($t3,$tbl) # Td3[s0>>0]
596
x $s0,1($i1,$tbl) # Td3[s1>>0]
597
l $s1,0($s1,$tbl) # Td0[s1>>24]
598
x $t2,3($i2,$tbl) # Td1[s1>>16]
599
x $t3,2($i3,$tbl) # Td2[s1>>8]
601
srlg $i1,$s2,`8-3` # i0
602
sllg $i2,$s2,`0+3` # i1
611
srlg $ra,$s3,`8-3` # i1
612
srlg $t1,$s3,`16-3` # i0
617
x $s0,2($i1,$tbl) # Td2[s2>>8]
618
x $s1,1($i2,$tbl) # Td3[s2>>0]
619
l $s2,0($s2,$tbl) # Td0[s2>>24]
620
x $t3,3($i3,$tbl) # Td1[s2>>16]
622
sllg $i3,$s3,`0+3` # i2
633
x $s0,3($t1,$tbl) # Td1[s3>>16]
634
x $s1,2($ra,$tbl) # Td2[s3>>8]
635
x $s2,1($i3,$tbl) # Td3[s3>>0]
636
l $s3,0($s3,$tbl) # Td0[s3>>24]
639
brct $rounds,.Ldec_loop
642
l $t1,`2048+0`($tbl) # prefetch Td4
643
l $t2,`2048+64`($tbl)
644
l $t3,`2048+128`($tbl)
645
l $i1,`2048+192`($tbl)
662
llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
663
llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
664
llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
666
llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
670
llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
671
llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
672
llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
674
llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
688
llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
689
llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
690
llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
691
llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
711
llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
712
llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
714
llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
715
llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
729
.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
733
# void AES_set_encrypt_key(const unsigned char *in, int bits,
735
.globl AES_set_encrypt_key
736
.type AES_set_encrypt_key,\@function
760
$code.=<<___ if (!$softonly);
761
# convert bits to km code, [128,192,256]->[18,19,20]
768
lghi %r0,0 # query capability vector
770
.long 0xb92f0042 # kmc %r4,%r2
777
lmg %r0,%r1,0($inp) # just copy 128 bits...
787
1: st $bits,236($key) # save bits
788
st %r5,240($key) # save km code
795
stmg %r6,%r13,48($sp) # all non-volatile regs
797
larl $tbl,AES_Te+2048
816
llgfr $t2,$s3 # temp=rk[3]
830
icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
831
icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
832
icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
833
icm $t2,1,0($i3) # Te4[rk[3]>>24]
834
x $t2,256($t3,$tbl) # rcon[i]
835
xr $s0,$t2 # rk[4]=rk[0]^...
836
xr $s1,$s0 # rk[5]=rk[1]^rk[4]
837
xr $s2,$s1 # rk[6]=rk[2]^rk[5]
838
xr $s3,$s2 # rk[7]=rk[3]^rk[6]
840
llgfr $t2,$s3 # temp=rk[3]
852
la $key,16($key) # key+=4
854
brct $rounds,.L128_loop
888
icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
889
icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
890
icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
891
icm $t1,1,0($i3) # Te4[rk[5]>>24]
892
x $t1,256($t3,$tbl) # rcon[i]
893
xr $s0,$t1 # rk[6]=rk[0]^...
894
xr $s1,$s0 # rk[7]=rk[1]^rk[6]
895
xr $s2,$s1 # rk[8]=rk[2]^rk[7]
896
xr $s3,$s2 # rk[9]=rk[3]^rk[8]
902
brct $rounds,.L192_continue
910
x $t1,16($key) # rk[10]=rk[4]^rk[9]
912
x $t1,20($key) # rk[11]=rk[5]^rk[10]
922
la $key,24($key) # key+=6
951
icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
952
icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
953
icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
954
icm $t1,1,0($i3) # Te4[rk[7]>>24]
955
x $t1,256($t3,$tbl) # rcon[i]
956
xr $s0,$t1 # rk[8]=rk[0]^...
957
xr $s1,$s0 # rk[9]=rk[1]^rk[8]
958
xr $s2,$s1 # rk[10]=rk[2]^rk[9]
959
xr $s3,$s2 # rk[11]=rk[3]^rk[10]
964
brct $rounds,.L256_continue
971
lgr $t1,$s3 # temp=rk[11]
982
llgc $t1,0($t1) # Te4[rk[11]>>0]
983
icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
984
icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
985
icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
986
x $t1,16($key) # rk[12]=rk[4]^...
988
x $t1,20($key) # rk[13]=rk[5]^rk[12]
990
x $t1,24($key) # rk[14]=rk[6]^rk[13]
992
x $t1,28($key) # rk[15]=rk[7]^rk[14]
1002
la $key,32($key) # key+=8
1009
.size AES_set_encrypt_key,.-AES_set_encrypt_key
1011
# void AES_set_decrypt_key(const unsigned char *in, int bits,
1013
.globl AES_set_decrypt_key
1014
.type AES_set_decrypt_key,\@function
1016
AES_set_decrypt_key:
1017
stg $key,32($sp) # I rely on AES_set_encrypt_key to
1018
stg $ra,112($sp) # save non-volatile registers!
1019
bras $ra,AES_set_encrypt_key
1025
$code.=<<___ if (!$softonly);
1030
oill $t0,0x80 # set "decrypt" bit
1038
bras $ra,.Lekey_internal
1044
.Lgo: llgf $rounds,240($key)
1052
.Linv: lmg $s0,$s1,0($i1)
1064
llgf $rounds,240($key)
1066
sll $rounds,2 # (rounds-1)*4
1067
llilh $mask80,0x8080
1068
llilh $mask1b,0x1b1b
1069
llilh $maskfe,0xfefe
1075
.Lmix: l $s0,16($key) # tp1
1103
xr $s1,$s0 # tp2^tp1
1104
xr $s2,$s0 # tp4^tp1
1105
rll $s0,$s0,24 # = ROTATE(tp1,8)
1107
xr $s0,$s1 # ^=tp2^tp1
1108
xr $s1,$s3 # tp2^tp1^tp8
1109
xr $s0,$s2 # ^=tp4^tp1^tp8
1112
xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1114
xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1115
xr $s0,$s3 # ^= ROTATE(tp8,8)
1121
lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1124
.size AES_set_decrypt_key,.-AES_set_decrypt_key
1127
#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1128
# size_t length, const AES_KEY *key,
1129
# unsigned char *ivec, const int enc)
1132
my $out="%r4"; # length and out are swapped
1138
.globl AES_cbc_encrypt
1139
.type AES_cbc_encrypt,\@function
1142
xgr %r3,%r4 # flip %r3 and %r4, out and len
1146
$code.=<<___ if (!$softonly);
1151
lg %r0,0($ivp) # copy ivec
1153
stmg %r0,%r1,16($sp)
1154
lmg %r0,%r1,0($key) # copy key, cover 256 bit
1155
stmg %r0,%r1,32($sp)
1156
lmg %r0,%r1,16($key)
1157
stmg %r0,%r1,48($sp)
1158
l %r0,240($key) # load kmc code
1159
lghi $key,15 # res=len%16, len-=res;
1162
la %r1,16($sp) # parameter block - ivec || key
1164
.long 0xb92f0042 # kmc %r4,%r2
1165
brc 1,.-4 # pay attention to "partial completion"
1169
lmg %r0,%r1,16($sp) # copy ivec to caller
1175
ahi $key,-1 # it's the way it's encoded in mvc
1177
jnz .Lkmc_truncated_dec
1182
mvc 128(1,$sp),0($inp)
1184
la %r1,16($sp) # restore parameter block
1187
.long 0xb92f0042 # kmc %r4,%r2
1190
.Lkmc_truncated_dec:
1194
.long 0xb92f0042 # kmc %r4,%r2
1197
mvc 0(1,$out),128($sp)
1204
stmg $key,$ra,40($sp)
1218
brc 4,.Lcbc_enc_tail # if borrow
1220
stmg $inp,$out,16($sp)
1227
bras $ra,_s390x_AES_encrypt
1229
lmg $inp,$key,16($sp)
1241
brc 4,.Lcbc_enc_tail # if borrow
1261
mvc 128(1,$sp),0($inp)
1273
stmg $t0,$t1,128($sp)
1276
stmg $inp,$out,16($sp)
1283
bras $ra,_s390x_AES_decrypt
1285
lmg $inp,$key,16($sp)
1297
brc 4,.Lcbc_dec_tail # if borrow
1298
brc 2,.Lcbc_dec_done # if zero
1301
stmg $t0,$t1,128($sp)
1311
lmg $ivp,$ra,48($sp)
1312
stmg $t0,$t1,0($ivp)
1322
mvc 0(1,$out),128($sp)
1325
.size AES_cbc_encrypt,.-AES_cbc_encrypt
1329
.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1332
$code =~ s/\`([^\`]*)\`/eval $1/gem;