3
# ====================================================================
4
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5
# project. The module is, however, dual licensed under OpenSSL and
6
# CRYPTOGAMS licenses depending on where you obtain it. For further
7
# details see http://www.openssl.org/~appro/cryptogams/.
8
# ====================================================================
10
# Needs more work: key setup, page boundaries, CBC routine...
12
# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13
# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14
# 4.0. But these are not the ones currently used! Their "compact"
15
# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16
# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17
# at 1/3 of ppc_AES_decrypt.
21
# Rescheduling instructions to favour Power6 pipeline gives 10%
22
# performance improvement on the platfrom in question (and marginal
23
# improvement even on others). It should be noted that Power6 fails
24
# to process byte in 18 cycles, only in 23, because it fails to issue
25
# 4 load instructions in two cycles, only in 3. As result non-compact
26
# block subroutines are 25% slower than one would expect. Compact
27
# functions scale better, because they have pure computational part,
28
# which scales perfectly with clock frequency. To be specific
29
# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30
# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
34
if ($flavour =~ /64/) {
39
} elsif ($flavour =~ /32/) {
44
} else { die "nonsense $flavour"; }
46
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49
die "can't locate ppc-xlate.pl";
51
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
57
while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
101
# stay away from TLS pointer
102
if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
103
else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
115
mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
116
addi $Tbl0,$Tbl0,`128-8`
123
mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
124
addi $Tbl0,$Tbl0,`128-8-32+2048+256`
130
0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
131
0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
132
0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
133
0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
134
0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
135
0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
136
0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
137
0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
138
0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
139
0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
140
0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
141
0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
142
0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
143
0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
144
0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
145
0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
146
0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
147
0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
148
0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
149
0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
150
0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
151
0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
152
0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
153
0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
154
0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
155
0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
156
0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
157
0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
158
0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
159
0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
160
0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
161
0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
162
0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
163
0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
164
0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
165
0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
166
0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
167
0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
168
0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
169
0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
170
0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
171
0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
172
0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
173
0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
174
0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
175
0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
176
0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
177
0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
178
0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
179
0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
180
0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
181
0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
182
0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
183
0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
184
0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
185
0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
186
0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
187
0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
188
0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
189
0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
190
0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
191
0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
192
0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
193
0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
195
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
196
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
197
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
198
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
199
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
200
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
201
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
202
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
203
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
204
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
205
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
206
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
207
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
208
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
209
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
210
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
211
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
212
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
213
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
214
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
215
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
216
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
217
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
218
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
219
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
220
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
221
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
222
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
223
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
224
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
225
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
226
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
229
0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
230
0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
231
0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
232
0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
233
0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
234
0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
235
0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
236
0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
237
0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
238
0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
239
0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
240
0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
241
0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
242
0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
243
0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
244
0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
245
0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
246
0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
247
0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
248
0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
249
0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
250
0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
251
0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
252
0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
253
0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
254
0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
255
0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
256
0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
257
0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
258
0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
259
0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
260
0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
261
0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
262
0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
263
0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
264
0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
265
0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
266
0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
267
0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
268
0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
269
0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
270
0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
271
0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
272
0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
273
0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
274
0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
275
0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
276
0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
277
0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
278
0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
279
0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
280
0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
281
0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
282
0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
283
0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
284
0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
285
0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
286
0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
287
0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
288
0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
289
0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
290
0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
291
0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
292
0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
294
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
295
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
296
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
297
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
298
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
299
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
300
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
301
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
302
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
303
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
304
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
305
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
306
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
307
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
308
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
309
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
310
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
311
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
312
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
313
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
314
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
315
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
316
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
317
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
318
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
319
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
320
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
321
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
322
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
323
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
324
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
325
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
332
$STU $sp,-$FRAME($sp)
334
$PUSH r0,`$FRAME-$SIZE_T*21`($sp)
335
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
336
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
337
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
338
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
339
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
340
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
341
$PUSH r18,`$FRAME-$SIZE_T*14`($sp)
342
$PUSH r19,`$FRAME-$SIZE_T*13`($sp)
343
$PUSH r20,`$FRAME-$SIZE_T*12`($sp)
344
$PUSH r21,`$FRAME-$SIZE_T*11`($sp)
345
$PUSH r22,`$FRAME-$SIZE_T*10`($sp)
346
$PUSH r23,`$FRAME-$SIZE_T*9`($sp)
347
$PUSH r24,`$FRAME-$SIZE_T*8`($sp)
348
$PUSH r25,`$FRAME-$SIZE_T*7`($sp)
349
$PUSH r26,`$FRAME-$SIZE_T*6`($sp)
350
$PUSH r27,`$FRAME-$SIZE_T*5`($sp)
351
$PUSH r28,`$FRAME-$SIZE_T*4`($sp)
352
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
353
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
354
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
361
bl Lppc_AES_encrypt_compact
367
$POP r0,`$FRAME-$SIZE_T*21`($sp)
368
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
369
$POP r13,`$FRAME-$SIZE_T*19`($sp)
370
$POP r14,`$FRAME-$SIZE_T*18`($sp)
371
$POP r15,`$FRAME-$SIZE_T*17`($sp)
372
$POP r16,`$FRAME-$SIZE_T*16`($sp)
373
$POP r17,`$FRAME-$SIZE_T*15`($sp)
374
$POP r18,`$FRAME-$SIZE_T*14`($sp)
375
$POP r19,`$FRAME-$SIZE_T*13`($sp)
376
$POP r20,`$FRAME-$SIZE_T*12`($sp)
377
$POP r21,`$FRAME-$SIZE_T*11`($sp)
378
$POP r22,`$FRAME-$SIZE_T*10`($sp)
379
$POP r23,`$FRAME-$SIZE_T*9`($sp)
380
$POP r24,`$FRAME-$SIZE_T*8`($sp)
381
$POP r25,`$FRAME-$SIZE_T*7`($sp)
382
$POP r26,`$FRAME-$SIZE_T*6`($sp)
383
$POP r27,`$FRAME-$SIZE_T*5`($sp)
384
$POP r28,`$FRAME-$SIZE_T*4`($sp)
385
$POP r29,`$FRAME-$SIZE_T*3`($sp)
386
$POP r30,`$FRAME-$SIZE_T*2`($sp)
387
$POP r31,`$FRAME-$SIZE_T*1`($sp)
402
addi $acc00,$acc00,-1
411
rlwinm $acc00,$s0,`32-24+3`,21,28
412
rlwinm $acc01,$s1,`32-24+3`,21,28
413
rlwinm $acc02,$s2,`32-24+3`,21,28
414
rlwinm $acc03,$s3,`32-24+3`,21,28
417
rlwinm $acc04,$s1,`32-16+3`,21,28
418
rlwinm $acc05,$s2,`32-16+3`,21,28
421
rlwinm $acc06,$s3,`32-16+3`,21,28
422
rlwinm $acc07,$s0,`32-16+3`,21,28
423
lwzx $acc00,$Tbl0,$acc00
424
lwzx $acc01,$Tbl0,$acc01
425
rlwinm $acc08,$s2,`32-8+3`,21,28
426
rlwinm $acc09,$s3,`32-8+3`,21,28
427
lwzx $acc02,$Tbl0,$acc02
428
lwzx $acc03,$Tbl0,$acc03
429
rlwinm $acc10,$s0,`32-8+3`,21,28
430
rlwinm $acc11,$s1,`32-8+3`,21,28
431
lwzx $acc04,$Tbl1,$acc04
432
lwzx $acc05,$Tbl1,$acc05
433
rlwinm $acc12,$s3,`0+3`,21,28
434
rlwinm $acc13,$s0,`0+3`,21,28
435
lwzx $acc06,$Tbl1,$acc06
436
lwzx $acc07,$Tbl1,$acc07
437
rlwinm $acc14,$s1,`0+3`,21,28
438
rlwinm $acc15,$s2,`0+3`,21,28
439
lwzx $acc08,$Tbl2,$acc08
440
lwzx $acc09,$Tbl2,$acc09
443
lwzx $acc10,$Tbl2,$acc10
444
lwzx $acc11,$Tbl2,$acc11
447
lwzx $acc12,$Tbl3,$acc12
448
lwzx $acc13,$Tbl3,$acc13
451
lwzx $acc14,$Tbl3,$acc14
452
lwzx $acc15,$Tbl3,$acc15
466
addi $Tbl2,$Tbl0,2048
470
rlwinm $acc00,$s0,`32-24`,24,31
471
rlwinm $acc01,$s1,`32-24`,24,31
474
rlwinm $acc02,$s2,`32-24`,24,31
475
rlwinm $acc03,$s3,`32-24`,24,31
476
lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
477
lwz $acc09,`2048+32`($Tbl0)
478
rlwinm $acc04,$s1,`32-16`,24,31
479
rlwinm $acc05,$s2,`32-16`,24,31
480
lwz $acc10,`2048+64`($Tbl0)
481
lwz $acc11,`2048+96`($Tbl0)
482
rlwinm $acc06,$s3,`32-16`,24,31
483
rlwinm $acc07,$s0,`32-16`,24,31
484
lwz $acc12,`2048+128`($Tbl0)
485
lwz $acc13,`2048+160`($Tbl0)
486
rlwinm $acc08,$s2,`32-8`,24,31
487
rlwinm $acc09,$s3,`32-8`,24,31
488
lwz $acc14,`2048+192`($Tbl0)
489
lwz $acc15,`2048+224`($Tbl0)
490
rlwinm $acc10,$s0,`32-8`,24,31
491
rlwinm $acc11,$s1,`32-8`,24,31
492
lbzx $acc00,$Tbl2,$acc00
493
lbzx $acc01,$Tbl2,$acc01
494
rlwinm $acc12,$s3,`0`,24,31
495
rlwinm $acc13,$s0,`0`,24,31
496
lbzx $acc02,$Tbl2,$acc02
497
lbzx $acc03,$Tbl2,$acc03
498
rlwinm $acc14,$s1,`0`,24,31
499
rlwinm $acc15,$s2,`0`,24,31
500
lbzx $acc04,$Tbl2,$acc04
501
lbzx $acc05,$Tbl2,$acc05
502
rlwinm $s0,$acc00,24,0,7
503
rlwinm $s1,$acc01,24,0,7
504
lbzx $acc06,$Tbl2,$acc06
505
lbzx $acc07,$Tbl2,$acc07
506
rlwinm $s2,$acc02,24,0,7
507
rlwinm $s3,$acc03,24,0,7
508
lbzx $acc08,$Tbl2,$acc08
509
lbzx $acc09,$Tbl2,$acc09
510
rlwimi $s0,$acc04,16,8,15
511
rlwimi $s1,$acc05,16,8,15
512
lbzx $acc10,$Tbl2,$acc10
513
lbzx $acc11,$Tbl2,$acc11
514
rlwimi $s2,$acc06,16,8,15
515
rlwimi $s3,$acc07,16,8,15
516
lbzx $acc12,$Tbl2,$acc12
517
lbzx $acc13,$Tbl2,$acc13
518
rlwimi $s0,$acc08,8,16,23
519
rlwimi $s1,$acc09,8,16,23
520
lbzx $acc14,$Tbl2,$acc14
521
lbzx $acc15,$Tbl2,$acc15
522
rlwimi $s2,$acc10,8,16,23
523
rlwimi $s3,$acc11,8,16,23
535
Lppc_AES_encrypt_compact:
541
addi $Tbl1,$Tbl0,2048
545
ori $mask80,$mask80,0x8080
546
ori $mask1b,$mask1b,0x1b1b
554
rlwinm $acc00,$s0,`32-24`,24,31
555
rlwinm $acc01,$s1,`32-24`,24,31
556
rlwinm $acc02,$s2,`32-24`,24,31
557
rlwinm $acc03,$s3,`32-24`,24,31
558
rlwinm $acc04,$s1,`32-16`,24,31
559
rlwinm $acc05,$s2,`32-16`,24,31
560
rlwinm $acc06,$s3,`32-16`,24,31
561
rlwinm $acc07,$s0,`32-16`,24,31
562
lbzx $acc00,$Tbl1,$acc00
563
lbzx $acc01,$Tbl1,$acc01
564
rlwinm $acc08,$s2,`32-8`,24,31
565
rlwinm $acc09,$s3,`32-8`,24,31
566
lbzx $acc02,$Tbl1,$acc02
567
lbzx $acc03,$Tbl1,$acc03
568
rlwinm $acc10,$s0,`32-8`,24,31
569
rlwinm $acc11,$s1,`32-8`,24,31
570
lbzx $acc04,$Tbl1,$acc04
571
lbzx $acc05,$Tbl1,$acc05
572
rlwinm $acc12,$s3,`0`,24,31
573
rlwinm $acc13,$s0,`0`,24,31
574
lbzx $acc06,$Tbl1,$acc06
575
lbzx $acc07,$Tbl1,$acc07
576
rlwinm $acc14,$s1,`0`,24,31
577
rlwinm $acc15,$s2,`0`,24,31
578
lbzx $acc08,$Tbl1,$acc08
579
lbzx $acc09,$Tbl1,$acc09
580
rlwinm $s0,$acc00,24,0,7
581
rlwinm $s1,$acc01,24,0,7
582
lbzx $acc10,$Tbl1,$acc10
583
lbzx $acc11,$Tbl1,$acc11
584
rlwinm $s2,$acc02,24,0,7
585
rlwinm $s3,$acc03,24,0,7
586
lbzx $acc12,$Tbl1,$acc12
587
lbzx $acc13,$Tbl1,$acc13
588
rlwimi $s0,$acc04,16,8,15
589
rlwimi $s1,$acc05,16,8,15
590
lbzx $acc14,$Tbl1,$acc14
591
lbzx $acc15,$Tbl1,$acc15
592
rlwimi $s2,$acc06,16,8,15
593
rlwimi $s3,$acc07,16,8,15
594
rlwimi $s0,$acc08,8,16,23
595
rlwimi $s1,$acc09,8,16,23
596
rlwimi $s2,$acc10,8,16,23
597
rlwimi $s3,$acc11,8,16,23
608
bdz Lenc_compact_done
610
and $acc00,$s0,$mask80 # r1=r0&0x80808080
611
and $acc01,$s1,$mask80
612
and $acc02,$s2,$mask80
613
and $acc03,$s3,$mask80
614
srwi $acc04,$acc00,7 # r1>>7
618
andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
619
andc $acc09,$s1,$mask80
620
andc $acc10,$s2,$mask80
621
andc $acc11,$s3,$mask80
622
sub $acc00,$acc00,$acc04 # r1-(r1>>7)
623
sub $acc01,$acc01,$acc05
624
sub $acc02,$acc02,$acc06
625
sub $acc03,$acc03,$acc07
626
add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
627
add $acc09,$acc09,$acc09
628
add $acc10,$acc10,$acc10
629
add $acc11,$acc11,$acc11
630
and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
631
and $acc01,$acc01,$mask1b
632
and $acc02,$acc02,$mask1b
633
and $acc03,$acc03,$mask1b
634
xor $acc00,$acc00,$acc08 # r2
635
xor $acc01,$acc01,$acc09
636
xor $acc02,$acc02,$acc10
637
xor $acc03,$acc03,$acc11
639
rotlwi $acc12,$s0,16 # ROTATE(r0,16)
643
xor $s0,$s0,$acc00 # r0^r2
647
rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
651
xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
655
rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
656
rotlwi $acc09,$acc13,8
657
rotlwi $acc10,$acc14,8
658
rotlwi $acc11,$acc15,8
681
$STU $sp,-$FRAME($sp)
683
$PUSH r0,`$FRAME-$SIZE_T*21`($sp)
684
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
685
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
686
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
687
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
688
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
689
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
690
$PUSH r18,`$FRAME-$SIZE_T*14`($sp)
691
$PUSH r19,`$FRAME-$SIZE_T*13`($sp)
692
$PUSH r20,`$FRAME-$SIZE_T*12`($sp)
693
$PUSH r21,`$FRAME-$SIZE_T*11`($sp)
694
$PUSH r22,`$FRAME-$SIZE_T*10`($sp)
695
$PUSH r23,`$FRAME-$SIZE_T*9`($sp)
696
$PUSH r24,`$FRAME-$SIZE_T*8`($sp)
697
$PUSH r25,`$FRAME-$SIZE_T*7`($sp)
698
$PUSH r26,`$FRAME-$SIZE_T*6`($sp)
699
$PUSH r27,`$FRAME-$SIZE_T*5`($sp)
700
$PUSH r28,`$FRAME-$SIZE_T*4`($sp)
701
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
702
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
703
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
710
bl Lppc_AES_decrypt_compact
716
$POP r0,`$FRAME-$SIZE_T*21`($sp)
717
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
718
$POP r13,`$FRAME-$SIZE_T*19`($sp)
719
$POP r14,`$FRAME-$SIZE_T*18`($sp)
720
$POP r15,`$FRAME-$SIZE_T*17`($sp)
721
$POP r16,`$FRAME-$SIZE_T*16`($sp)
722
$POP r17,`$FRAME-$SIZE_T*15`($sp)
723
$POP r18,`$FRAME-$SIZE_T*14`($sp)
724
$POP r19,`$FRAME-$SIZE_T*13`($sp)
725
$POP r20,`$FRAME-$SIZE_T*12`($sp)
726
$POP r21,`$FRAME-$SIZE_T*11`($sp)
727
$POP r22,`$FRAME-$SIZE_T*10`($sp)
728
$POP r23,`$FRAME-$SIZE_T*9`($sp)
729
$POP r24,`$FRAME-$SIZE_T*8`($sp)
730
$POP r25,`$FRAME-$SIZE_T*7`($sp)
731
$POP r26,`$FRAME-$SIZE_T*6`($sp)
732
$POP r27,`$FRAME-$SIZE_T*5`($sp)
733
$POP r28,`$FRAME-$SIZE_T*4`($sp)
734
$POP r29,`$FRAME-$SIZE_T*3`($sp)
735
$POP r30,`$FRAME-$SIZE_T*2`($sp)
736
$POP r31,`$FRAME-$SIZE_T*1`($sp)
751
addi $acc00,$acc00,-1
760
rlwinm $acc00,$s0,`32-24+3`,21,28
761
rlwinm $acc01,$s1,`32-24+3`,21,28
762
rlwinm $acc02,$s2,`32-24+3`,21,28
763
rlwinm $acc03,$s3,`32-24+3`,21,28
766
rlwinm $acc04,$s3,`32-16+3`,21,28
767
rlwinm $acc05,$s0,`32-16+3`,21,28
770
rlwinm $acc06,$s1,`32-16+3`,21,28
771
rlwinm $acc07,$s2,`32-16+3`,21,28
772
lwzx $acc00,$Tbl0,$acc00
773
lwzx $acc01,$Tbl0,$acc01
774
rlwinm $acc08,$s2,`32-8+3`,21,28
775
rlwinm $acc09,$s3,`32-8+3`,21,28
776
lwzx $acc02,$Tbl0,$acc02
777
lwzx $acc03,$Tbl0,$acc03
778
rlwinm $acc10,$s0,`32-8+3`,21,28
779
rlwinm $acc11,$s1,`32-8+3`,21,28
780
lwzx $acc04,$Tbl1,$acc04
781
lwzx $acc05,$Tbl1,$acc05
782
rlwinm $acc12,$s1,`0+3`,21,28
783
rlwinm $acc13,$s2,`0+3`,21,28
784
lwzx $acc06,$Tbl1,$acc06
785
lwzx $acc07,$Tbl1,$acc07
786
rlwinm $acc14,$s3,`0+3`,21,28
787
rlwinm $acc15,$s0,`0+3`,21,28
788
lwzx $acc08,$Tbl2,$acc08
789
lwzx $acc09,$Tbl2,$acc09
792
lwzx $acc10,$Tbl2,$acc10
793
lwzx $acc11,$Tbl2,$acc11
796
lwzx $acc12,$Tbl3,$acc12
797
lwzx $acc13,$Tbl3,$acc13
800
lwzx $acc14,$Tbl3,$acc14
801
lwzx $acc15,$Tbl3,$acc15
815
addi $Tbl2,$Tbl0,2048
819
rlwinm $acc00,$s0,`32-24`,24,31
820
rlwinm $acc01,$s1,`32-24`,24,31
823
rlwinm $acc02,$s2,`32-24`,24,31
824
rlwinm $acc03,$s3,`32-24`,24,31
825
lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
826
lwz $acc09,`2048+32`($Tbl0)
827
rlwinm $acc04,$s3,`32-16`,24,31
828
rlwinm $acc05,$s0,`32-16`,24,31
829
lwz $acc10,`2048+64`($Tbl0)
830
lwz $acc11,`2048+96`($Tbl0)
831
lbzx $acc00,$Tbl2,$acc00
832
lbzx $acc01,$Tbl2,$acc01
833
lwz $acc12,`2048+128`($Tbl0)
834
lwz $acc13,`2048+160`($Tbl0)
835
rlwinm $acc06,$s1,`32-16`,24,31
836
rlwinm $acc07,$s2,`32-16`,24,31
837
lwz $acc14,`2048+192`($Tbl0)
838
lwz $acc15,`2048+224`($Tbl0)
839
rlwinm $acc08,$s2,`32-8`,24,31
840
rlwinm $acc09,$s3,`32-8`,24,31
841
lbzx $acc02,$Tbl2,$acc02
842
lbzx $acc03,$Tbl2,$acc03
843
rlwinm $acc10,$s0,`32-8`,24,31
844
rlwinm $acc11,$s1,`32-8`,24,31
845
lbzx $acc04,$Tbl2,$acc04
846
lbzx $acc05,$Tbl2,$acc05
847
rlwinm $acc12,$s1,`0`,24,31
848
rlwinm $acc13,$s2,`0`,24,31
849
lbzx $acc06,$Tbl2,$acc06
850
lbzx $acc07,$Tbl2,$acc07
851
rlwinm $acc14,$s3,`0`,24,31
852
rlwinm $acc15,$s0,`0`,24,31
853
lbzx $acc08,$Tbl2,$acc08
854
lbzx $acc09,$Tbl2,$acc09
855
rlwinm $s0,$acc00,24,0,7
856
rlwinm $s1,$acc01,24,0,7
857
lbzx $acc10,$Tbl2,$acc10
858
lbzx $acc11,$Tbl2,$acc11
859
rlwinm $s2,$acc02,24,0,7
860
rlwinm $s3,$acc03,24,0,7
861
lbzx $acc12,$Tbl2,$acc12
862
lbzx $acc13,$Tbl2,$acc13
863
rlwimi $s0,$acc04,16,8,15
864
rlwimi $s1,$acc05,16,8,15
865
lbzx $acc14,$Tbl2,$acc14
866
lbzx $acc15,$Tbl2,$acc15
867
rlwimi $s2,$acc06,16,8,15
868
rlwimi $s3,$acc07,16,8,15
869
rlwimi $s0,$acc08,8,16,23
870
rlwimi $s1,$acc09,8,16,23
871
rlwimi $s2,$acc10,8,16,23
872
rlwimi $s3,$acc11,8,16,23
884
Lppc_AES_decrypt_compact:
890
addi $Tbl1,$Tbl0,2048
894
ori $mask80,$mask80,0x8080
895
ori $mask1b,$mask1b,0x1b1b
897
$code.=<<___ if ($SIZE_T==8);
898
insrdi $mask80,$mask80,32,0
899
insrdi $mask1b,$mask1b,32,0
909
rlwinm $acc00,$s0,`32-24`,24,31
910
rlwinm $acc01,$s1,`32-24`,24,31
911
rlwinm $acc02,$s2,`32-24`,24,31
912
rlwinm $acc03,$s3,`32-24`,24,31
913
rlwinm $acc04,$s3,`32-16`,24,31
914
rlwinm $acc05,$s0,`32-16`,24,31
915
rlwinm $acc06,$s1,`32-16`,24,31
916
rlwinm $acc07,$s2,`32-16`,24,31
917
lbzx $acc00,$Tbl1,$acc00
918
lbzx $acc01,$Tbl1,$acc01
919
rlwinm $acc08,$s2,`32-8`,24,31
920
rlwinm $acc09,$s3,`32-8`,24,31
921
lbzx $acc02,$Tbl1,$acc02
922
lbzx $acc03,$Tbl1,$acc03
923
rlwinm $acc10,$s0,`32-8`,24,31
924
rlwinm $acc11,$s1,`32-8`,24,31
925
lbzx $acc04,$Tbl1,$acc04
926
lbzx $acc05,$Tbl1,$acc05
927
rlwinm $acc12,$s1,`0`,24,31
928
rlwinm $acc13,$s2,`0`,24,31
929
lbzx $acc06,$Tbl1,$acc06
930
lbzx $acc07,$Tbl1,$acc07
931
rlwinm $acc14,$s3,`0`,24,31
932
rlwinm $acc15,$s0,`0`,24,31
933
lbzx $acc08,$Tbl1,$acc08
934
lbzx $acc09,$Tbl1,$acc09
935
rlwinm $s0,$acc00,24,0,7
936
rlwinm $s1,$acc01,24,0,7
937
lbzx $acc10,$Tbl1,$acc10
938
lbzx $acc11,$Tbl1,$acc11
939
rlwinm $s2,$acc02,24,0,7
940
rlwinm $s3,$acc03,24,0,7
941
lbzx $acc12,$Tbl1,$acc12
942
lbzx $acc13,$Tbl1,$acc13
943
rlwimi $s0,$acc04,16,8,15
944
rlwimi $s1,$acc05,16,8,15
945
lbzx $acc14,$Tbl1,$acc14
946
lbzx $acc15,$Tbl1,$acc15
947
rlwimi $s2,$acc06,16,8,15
948
rlwimi $s3,$acc07,16,8,15
949
rlwimi $s0,$acc08,8,16,23
950
rlwimi $s1,$acc09,8,16,23
951
rlwimi $s2,$acc10,8,16,23
952
rlwimi $s3,$acc11,8,16,23
963
bdz Ldec_compact_done
965
$code.=<<___ if ($SIZE_T==8);
966
# vectorized permutation improves decrypt performance by 10%
970
and $acc00,$s0,$mask80 # r1=r0&0x80808080
971
and $acc02,$s2,$mask80
972
srdi $acc04,$acc00,7 # r1>>7
974
andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
975
andc $acc10,$s2,$mask80
976
sub $acc00,$acc00,$acc04 # r1-(r1>>7)
977
sub $acc02,$acc02,$acc06
978
add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
979
add $acc10,$acc10,$acc10
980
and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
981
and $acc02,$acc02,$mask1b
982
xor $acc00,$acc00,$acc08 # r2
983
xor $acc02,$acc02,$acc10
985
and $acc04,$acc00,$mask80 # r1=r2&0x80808080
986
and $acc06,$acc02,$mask80
987
srdi $acc08,$acc04,7 # r1>>7
989
andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
990
andc $acc14,$acc02,$mask80
991
sub $acc04,$acc04,$acc08 # r1-(r1>>7)
992
sub $acc06,$acc06,$acc10
993
add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
994
add $acc14,$acc14,$acc14
995
and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
996
and $acc06,$acc06,$mask1b
997
xor $acc04,$acc04,$acc12 # r4
998
xor $acc06,$acc06,$acc14
1000
and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1001
and $acc10,$acc06,$mask80
1002
srdi $acc12,$acc08,7 # r1>>7
1003
srdi $acc14,$acc10,7
1004
sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1005
sub $acc10,$acc10,$acc14
1006
andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1007
andc $acc14,$acc06,$mask80
1008
add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1009
add $acc14,$acc14,$acc14
1010
and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1011
and $acc10,$acc10,$mask1b
1012
xor $acc08,$acc08,$acc12 # r8
1013
xor $acc10,$acc10,$acc14
1015
xor $acc00,$acc00,$s0 # r2^r0
1016
xor $acc02,$acc02,$s2
1017
xor $acc04,$acc04,$s0 # r4^r0
1018
xor $acc06,$acc06,$s2
1020
extrdi $acc01,$acc00,32,0
1021
extrdi $acc03,$acc02,32,0
1022
extrdi $acc05,$acc04,32,0
1023
extrdi $acc07,$acc06,32,0
1024
extrdi $acc09,$acc08,32,0
1025
extrdi $acc11,$acc10,32,0
1027
$code.=<<___ if ($SIZE_T==4);
1028
and $acc00,$s0,$mask80 # r1=r0&0x80808080
1029
and $acc01,$s1,$mask80
1030
and $acc02,$s2,$mask80
1031
and $acc03,$s3,$mask80
1032
srwi $acc04,$acc00,7 # r1>>7
1033
srwi $acc05,$acc01,7
1034
srwi $acc06,$acc02,7
1035
srwi $acc07,$acc03,7
1036
andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1037
andc $acc09,$s1,$mask80
1038
andc $acc10,$s2,$mask80
1039
andc $acc11,$s3,$mask80
1040
sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1041
sub $acc01,$acc01,$acc05
1042
sub $acc02,$acc02,$acc06
1043
sub $acc03,$acc03,$acc07
1044
add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1045
add $acc09,$acc09,$acc09
1046
add $acc10,$acc10,$acc10
1047
add $acc11,$acc11,$acc11
1048
and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1049
and $acc01,$acc01,$mask1b
1050
and $acc02,$acc02,$mask1b
1051
and $acc03,$acc03,$mask1b
1052
xor $acc00,$acc00,$acc08 # r2
1053
xor $acc01,$acc01,$acc09
1054
xor $acc02,$acc02,$acc10
1055
xor $acc03,$acc03,$acc11
1057
and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1058
and $acc05,$acc01,$mask80
1059
and $acc06,$acc02,$mask80
1060
and $acc07,$acc03,$mask80
1061
srwi $acc08,$acc04,7 # r1>>7
1062
srwi $acc09,$acc05,7
1063
srwi $acc10,$acc06,7
1064
srwi $acc11,$acc07,7
1065
andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1066
andc $acc13,$acc01,$mask80
1067
andc $acc14,$acc02,$mask80
1068
andc $acc15,$acc03,$mask80
1069
sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1070
sub $acc05,$acc05,$acc09
1071
sub $acc06,$acc06,$acc10
1072
sub $acc07,$acc07,$acc11
1073
add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1074
add $acc13,$acc13,$acc13
1075
add $acc14,$acc14,$acc14
1076
add $acc15,$acc15,$acc15
1077
and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1078
and $acc05,$acc05,$mask1b
1079
and $acc06,$acc06,$mask1b
1080
and $acc07,$acc07,$mask1b
1081
xor $acc04,$acc04,$acc12 # r4
1082
xor $acc05,$acc05,$acc13
1083
xor $acc06,$acc06,$acc14
1084
xor $acc07,$acc07,$acc15
1086
and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1087
and $acc09,$acc05,$mask80
1088
and $acc10,$acc06,$mask80
1089
and $acc11,$acc07,$mask80
1090
srwi $acc12,$acc08,7 # r1>>7
1091
srwi $acc13,$acc09,7
1092
srwi $acc14,$acc10,7
1093
srwi $acc15,$acc11,7
1094
sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1095
sub $acc09,$acc09,$acc13
1096
sub $acc10,$acc10,$acc14
1097
sub $acc11,$acc11,$acc15
1098
andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1099
andc $acc13,$acc05,$mask80
1100
andc $acc14,$acc06,$mask80
1101
andc $acc15,$acc07,$mask80
1102
add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1103
add $acc13,$acc13,$acc13
1104
add $acc14,$acc14,$acc14
1105
add $acc15,$acc15,$acc15
1106
and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1107
and $acc09,$acc09,$mask1b
1108
and $acc10,$acc10,$mask1b
1109
and $acc11,$acc11,$mask1b
1110
xor $acc08,$acc08,$acc12 # r8
1111
xor $acc09,$acc09,$acc13
1112
xor $acc10,$acc10,$acc14
1113
xor $acc11,$acc11,$acc15
1115
xor $acc00,$acc00,$s0 # r2^r0
1116
xor $acc01,$acc01,$s1
1117
xor $acc02,$acc02,$s2
1118
xor $acc03,$acc03,$s3
1119
xor $acc04,$acc04,$s0 # r4^r0
1120
xor $acc05,$acc05,$s1
1121
xor $acc06,$acc06,$s2
1122
xor $acc07,$acc07,$s3
1125
rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1129
xor $s0,$s0,$acc00 # ^= r2^r0
1133
xor $acc00,$acc00,$acc08
1134
xor $acc01,$acc01,$acc09
1135
xor $acc02,$acc02,$acc10
1136
xor $acc03,$acc03,$acc11
1137
xor $s0,$s0,$acc04 # ^= r4^r0
1141
rotrwi $acc00,$acc00,24
1142
rotrwi $acc01,$acc01,24
1143
rotrwi $acc02,$acc02,24
1144
rotrwi $acc03,$acc03,24
1145
xor $acc04,$acc04,$acc08
1146
xor $acc05,$acc05,$acc09
1147
xor $acc06,$acc06,$acc10
1148
xor $acc07,$acc07,$acc11
1149
xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1153
rotrwi $acc04,$acc04,16
1154
rotrwi $acc05,$acc05,16
1155
rotrwi $acc06,$acc06,16
1156
rotrwi $acc07,$acc07,16
1157
xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1161
rotrwi $acc08,$acc08,8
1162
rotrwi $acc09,$acc09,8
1163
rotrwi $acc10,$acc10,8
1164
rotrwi $acc11,$acc11,8
1165
xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1169
xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1183
.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1187
$code =~ s/\`([^\`]*)\`/eval $1/gem;