~andersk/ubuntu/oneiric/openssl/spurious-reboot

« back to all changes in this revision

Viewing changes to crypto/aes/asm/aes-ppc.pl

  • Committer: Bazaar Package Importer
  • Author(s): Colin Watson
  • Date: 2011-05-01 23:51:53 UTC
  • mfrom: (11.1.20 sid)
  • Revision ID: james.westby@ubuntu.com-20110501235153-bjcxitndquaezb68
Tags: 1.0.0d-2ubuntu1
* Resynchronise with Debian (LP: #675566).  Remaining changes:
  - debian/libssl1.0.0.postinst:
    + Display a system restart required notification bubble on libssl1.0.0
      upgrade.
    + Use a different priority for libssl1.0.0/restart-services depending
      on whether a desktop, or server dist-upgrade is being performed.
  - debian/{libssl1.0.0-udeb.dirs, control, rules}: Create
    libssl1.0.0-udeb, for the benefit of wget-udeb (no wget-udeb package
    in Debian).
  - debian/{libcrypto1.0.0-udeb.dirs, libssl1.0.0.dirs, libssl1.0.0.files,
    rules}: Move runtime libraries to /lib, for the benefit of
    wpasupplicant.
  - debian/patches/aesni.patch: Backport Intel AES-NI support, now from
    http://rt.openssl.org/Ticket/Display.html?id=2065 rather than the
    0.9.8 variant.
  - debian/patches/Bsymbolic-functions.patch: Link using
    -Bsymbolic-functions.
  - debian/patches/perlpath-quilt.patch: Don't change perl #! paths under
    .pc.
  - debian/rules:
    + Don't run 'make test' when cross-building.
    + Use host compiler when cross-building.  Patch from Neil Williams.
    + Don't build for processors no longer supported: i486, i586 (on
      i386), v8 (on sparc).
    + Fix Makefile to properly clean up libs/ dirs in clean target.
    + Replace duplicate files in the doc directory with symlinks.
* Update architectures affected by Bsymbolic-functions.patch.
* Drop debian/patches/no-sslv2.patch; Debian now adds the 'no-ssl2'
  configure option, which compiles out SSLv2 support entirely, so this is
  no longer needed.
* Drop openssl-doc in favour of the libssl-doc package introduced by
  Debian.  Add Conflicts/Replaces until the next LTS release.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/env perl
 
2
 
 
3
# ====================================================================
 
4
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
 
5
# project. The module is, however, dual licensed under OpenSSL and
 
6
# CRYPTOGAMS licenses depending on where you obtain it. For further
 
7
# details see http://www.openssl.org/~appro/cryptogams/.
 
8
# ====================================================================
 
9
 
 
10
# Needs more work: key setup, page boundaries, CBC routine...
 
11
#
 
12
# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
 
13
# 128-bit key, which is ~40% better than 64-bit code generated by gcc
 
14
# 4.0. But these are not the ones currently used! Their "compact"
 
15
# counterparts are, for security reason. ppc_AES_encrypt_compact runs
 
16
# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
 
17
# at 1/3 of ppc_AES_decrypt.
 
18
 
 
19
# February 2010
 
20
#
 
21
# Rescheduling instructions to favour Power6 pipeline gives 10%
 
22
# performance improvement on the platfrom in question (and marginal
 
23
# improvement even on others). It should be noted that Power6 fails
 
24
# to process byte in 18 cycles, only in 23, because it fails to issue
 
25
# 4 load instructions in two cycles, only in 3. As result non-compact
 
26
# block subroutines are 25% slower than one would expect. Compact
 
27
# functions scale better, because they have pure computational part,
 
28
# which scales perfectly with clock frequency. To be specific
 
29
# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
 
30
# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
 
31
 
 
32
$flavour = shift;
 
33
 
 
34
if ($flavour =~ /64/) {
 
35
        $SIZE_T =8;
 
36
        $STU    ="stdu";
 
37
        $POP    ="ld";
 
38
        $PUSH   ="std";
 
39
} elsif ($flavour =~ /32/) {
 
40
        $SIZE_T =4;
 
41
        $STU    ="stwu";
 
42
        $POP    ="lwz";
 
43
        $PUSH   ="stw";
 
44
} else { die "nonsense $flavour"; }
 
45
 
 
46
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 
47
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 
48
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
 
49
die "can't locate ppc-xlate.pl";
 
50
 
 
51
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
 
52
 
 
53
$FRAME=32*$SIZE_T;
 
54
 
 
55
sub _data_word()
 
56
{ my $i;
 
57
    while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
 
58
}
 
59
 
 
60
$sp="r1";
 
61
$toc="r2";
 
62
$inp="r3";
 
63
$out="r4";
 
64
$key="r5";
 
65
 
 
66
$Tbl0="r3";
 
67
$Tbl1="r6";
 
68
$Tbl2="r7";
 
69
$Tbl3="r2";
 
70
 
 
71
$s0="r8";
 
72
$s1="r9";
 
73
$s2="r10";
 
74
$s3="r11";
 
75
 
 
76
$t0="r12";
 
77
$t1="r13";
 
78
$t2="r14";
 
79
$t3="r15";
 
80
 
 
81
$acc00="r16";
 
82
$acc01="r17";
 
83
$acc02="r18";
 
84
$acc03="r19";
 
85
 
 
86
$acc04="r20";
 
87
$acc05="r21";
 
88
$acc06="r22";
 
89
$acc07="r23";
 
90
 
 
91
$acc08="r24";
 
92
$acc09="r25";
 
93
$acc10="r26";
 
94
$acc11="r27";
 
95
 
 
96
$acc12="r28";
 
97
$acc13="r29";
 
98
$acc14="r30";
 
99
$acc15="r31";
 
100
 
 
101
# stay away from TLS pointer
 
102
if ($SIZE_T==8) { die if ($t1 ne "r13");  $t1="r0";             }
 
103
else            { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";  }
 
104
$mask80=$Tbl2;
 
105
$mask1b=$Tbl3;
 
106
 
 
107
$code.=<<___;
 
108
.machine        "any"
 
109
.text
 
110
 
 
111
.align  7
 
112
LAES_Te:
 
113
        mflr    r0
 
114
        bcl     20,31,\$+4
 
115
        mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
 
116
        addi    $Tbl0,$Tbl0,`128-8`
 
117
        mtlr    r0
 
118
        blr
 
119
        .space  `32-24`
 
120
LAES_Td:
 
121
        mflr    r0
 
122
        bcl     20,31,\$+4
 
123
        mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
 
124
        addi    $Tbl0,$Tbl0,`128-8-32+2048+256`
 
125
        mtlr    r0
 
126
        blr
 
127
        .space  `128-32-24`
 
128
___
 
129
&_data_word(
 
130
        0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
 
131
        0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
 
132
        0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
 
133
        0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
 
134
        0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
 
135
        0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
 
136
        0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
 
137
        0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
 
138
        0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
 
139
        0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
 
140
        0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
 
141
        0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
 
142
        0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
 
143
        0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
 
144
        0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
 
145
        0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
 
146
        0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
 
147
        0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
 
148
        0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
 
149
        0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
 
150
        0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
 
151
        0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
 
152
        0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
 
153
        0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
 
154
        0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
 
155
        0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
 
156
        0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
 
157
        0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
 
158
        0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
 
159
        0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
 
160
        0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
 
161
        0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
 
162
        0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
 
163
        0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
 
164
        0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
 
165
        0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
 
166
        0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
 
167
        0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
 
168
        0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
 
169
        0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
 
170
        0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
 
171
        0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
 
172
        0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
 
173
        0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
 
174
        0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
 
175
        0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
 
176
        0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
 
177
        0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
 
178
        0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
 
179
        0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
 
180
        0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
 
181
        0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
 
182
        0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
 
183
        0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
 
184
        0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
 
185
        0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
 
186
        0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
 
187
        0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
 
188
        0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
 
189
        0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
 
190
        0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
 
191
        0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
 
192
        0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
 
193
        0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
 
194
$code.=<<___;
 
195
.byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
 
196
.byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
 
197
.byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
 
198
.byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
 
199
.byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
 
200
.byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
 
201
.byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
 
202
.byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
 
203
.byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
 
204
.byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
 
205
.byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
 
206
.byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
 
207
.byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
 
208
.byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
 
209
.byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
 
210
.byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
 
211
.byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
 
212
.byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
 
213
.byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
 
214
.byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
 
215
.byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
 
216
.byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
 
217
.byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
 
218
.byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
 
219
.byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
 
220
.byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
 
221
.byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
 
222
.byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
 
223
.byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
 
224
.byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
 
225
.byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
 
226
.byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
 
227
___
 
228
&_data_word(
 
229
        0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
 
230
        0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
 
231
        0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
 
232
        0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
 
233
        0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
 
234
        0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
 
235
        0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
 
236
        0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
 
237
        0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
 
238
        0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
 
239
        0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
 
240
        0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
 
241
        0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
 
242
        0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
 
243
        0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
 
244
        0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
 
245
        0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
 
246
        0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
 
247
        0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
 
248
        0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
 
249
        0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
 
250
        0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
 
251
        0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
 
252
        0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
 
253
        0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
 
254
        0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
 
255
        0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
 
256
        0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
 
257
        0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
 
258
        0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
 
259
        0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
 
260
        0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
 
261
        0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
 
262
        0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
 
263
        0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
 
264
        0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
 
265
        0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
 
266
        0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
 
267
        0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
 
268
        0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
 
269
        0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
 
270
        0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
 
271
        0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
 
272
        0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
 
273
        0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
 
274
        0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
 
275
        0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
 
276
        0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
 
277
        0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
 
278
        0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
 
279
        0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
 
280
        0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
 
281
        0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
 
282
        0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
 
283
        0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
 
284
        0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
 
285
        0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
 
286
        0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
 
287
        0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
 
288
        0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
 
289
        0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
 
290
        0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
 
291
        0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
 
292
        0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
 
293
$code.=<<___;
 
294
.byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
 
295
.byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
 
296
.byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
 
297
.byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
 
298
.byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
 
299
.byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
 
300
.byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
 
301
.byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
 
302
.byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
 
303
.byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
 
304
.byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
 
305
.byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
 
306
.byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
 
307
.byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
 
308
.byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
 
309
.byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
 
310
.byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
 
311
.byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
 
312
.byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
 
313
.byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
 
314
.byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
 
315
.byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
 
316
.byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
 
317
.byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
 
318
.byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
 
319
.byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
 
320
.byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
 
321
.byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
 
322
.byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
 
323
.byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
 
324
.byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 
325
.byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 
326
 
 
327
 
 
328
.globl  .AES_encrypt
 
329
.align  7
 
330
.AES_encrypt:
 
331
        mflr    r0
 
332
        $STU    $sp,-$FRAME($sp)
 
333
 
 
334
        $PUSH   r0,`$FRAME-$SIZE_T*21`($sp)
 
335
        $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
 
336
        $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
 
337
        $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
 
338
        $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
 
339
        $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
 
340
        $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
 
341
        $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
 
342
        $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
 
343
        $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
 
344
        $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
 
345
        $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
 
346
        $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
 
347
        $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
 
348
        $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
 
349
        $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
 
350
        $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
 
351
        $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
 
352
        $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
 
353
        $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
 
354
        $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
 
355
 
 
356
        lwz     $s0,0($inp)
 
357
        lwz     $s1,4($inp)
 
358
        lwz     $s2,8($inp)
 
359
        lwz     $s3,12($inp)
 
360
        bl      LAES_Te
 
361
        bl      Lppc_AES_encrypt_compact
 
362
        stw     $s0,0($out)
 
363
        stw     $s1,4($out)
 
364
        stw     $s2,8($out)
 
365
        stw     $s3,12($out)
 
366
 
 
367
        $POP    r0,`$FRAME-$SIZE_T*21`($sp)
 
368
        $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
 
369
        $POP    r13,`$FRAME-$SIZE_T*19`($sp)
 
370
        $POP    r14,`$FRAME-$SIZE_T*18`($sp)
 
371
        $POP    r15,`$FRAME-$SIZE_T*17`($sp)
 
372
        $POP    r16,`$FRAME-$SIZE_T*16`($sp)
 
373
        $POP    r17,`$FRAME-$SIZE_T*15`($sp)
 
374
        $POP    r18,`$FRAME-$SIZE_T*14`($sp)
 
375
        $POP    r19,`$FRAME-$SIZE_T*13`($sp)
 
376
        $POP    r20,`$FRAME-$SIZE_T*12`($sp)
 
377
        $POP    r21,`$FRAME-$SIZE_T*11`($sp)
 
378
        $POP    r22,`$FRAME-$SIZE_T*10`($sp)
 
379
        $POP    r23,`$FRAME-$SIZE_T*9`($sp)
 
380
        $POP    r24,`$FRAME-$SIZE_T*8`($sp)
 
381
        $POP    r25,`$FRAME-$SIZE_T*7`($sp)
 
382
        $POP    r26,`$FRAME-$SIZE_T*6`($sp)
 
383
        $POP    r27,`$FRAME-$SIZE_T*5`($sp)
 
384
        $POP    r28,`$FRAME-$SIZE_T*4`($sp)
 
385
        $POP    r29,`$FRAME-$SIZE_T*3`($sp)
 
386
        $POP    r30,`$FRAME-$SIZE_T*2`($sp)
 
387
        $POP    r31,`$FRAME-$SIZE_T*1`($sp)
 
388
        mtlr    r0
 
389
        addi    $sp,$sp,$FRAME
 
390
        blr
 
391
 
 
392
.align  5
 
393
Lppc_AES_encrypt:
 
394
        lwz     $acc00,240($key)
 
395
        lwz     $t0,0($key)
 
396
        lwz     $t1,4($key)
 
397
        lwz     $t2,8($key)
 
398
        lwz     $t3,12($key)
 
399
        addi    $Tbl1,$Tbl0,3
 
400
        addi    $Tbl2,$Tbl0,2
 
401
        addi    $Tbl3,$Tbl0,1
 
402
        addi    $acc00,$acc00,-1
 
403
        addi    $key,$key,16
 
404
        xor     $s0,$s0,$t0
 
405
        xor     $s1,$s1,$t1
 
406
        xor     $s2,$s2,$t2
 
407
        xor     $s3,$s3,$t3
 
408
        mtctr   $acc00
 
409
.align  4
 
410
Lenc_loop:
 
411
        rlwinm  $acc00,$s0,`32-24+3`,21,28
 
412
        rlwinm  $acc01,$s1,`32-24+3`,21,28
 
413
        rlwinm  $acc02,$s2,`32-24+3`,21,28
 
414
        rlwinm  $acc03,$s3,`32-24+3`,21,28
 
415
        lwz     $t0,0($key)
 
416
        lwz     $t1,4($key)
 
417
        rlwinm  $acc04,$s1,`32-16+3`,21,28
 
418
        rlwinm  $acc05,$s2,`32-16+3`,21,28
 
419
        lwz     $t2,8($key)
 
420
        lwz     $t3,12($key)
 
421
        rlwinm  $acc06,$s3,`32-16+3`,21,28
 
422
        rlwinm  $acc07,$s0,`32-16+3`,21,28
 
423
        lwzx    $acc00,$Tbl0,$acc00
 
424
        lwzx    $acc01,$Tbl0,$acc01
 
425
        rlwinm  $acc08,$s2,`32-8+3`,21,28
 
426
        rlwinm  $acc09,$s3,`32-8+3`,21,28
 
427
        lwzx    $acc02,$Tbl0,$acc02
 
428
        lwzx    $acc03,$Tbl0,$acc03
 
429
        rlwinm  $acc10,$s0,`32-8+3`,21,28
 
430
        rlwinm  $acc11,$s1,`32-8+3`,21,28
 
431
        lwzx    $acc04,$Tbl1,$acc04
 
432
        lwzx    $acc05,$Tbl1,$acc05
 
433
        rlwinm  $acc12,$s3,`0+3`,21,28
 
434
        rlwinm  $acc13,$s0,`0+3`,21,28
 
435
        lwzx    $acc06,$Tbl1,$acc06
 
436
        lwzx    $acc07,$Tbl1,$acc07
 
437
        rlwinm  $acc14,$s1,`0+3`,21,28
 
438
        rlwinm  $acc15,$s2,`0+3`,21,28
 
439
        lwzx    $acc08,$Tbl2,$acc08
 
440
        lwzx    $acc09,$Tbl2,$acc09
 
441
        xor     $t0,$t0,$acc00
 
442
        xor     $t1,$t1,$acc01
 
443
        lwzx    $acc10,$Tbl2,$acc10
 
444
        lwzx    $acc11,$Tbl2,$acc11
 
445
        xor     $t2,$t2,$acc02
 
446
        xor     $t3,$t3,$acc03
 
447
        lwzx    $acc12,$Tbl3,$acc12
 
448
        lwzx    $acc13,$Tbl3,$acc13
 
449
        xor     $t0,$t0,$acc04
 
450
        xor     $t1,$t1,$acc05
 
451
        lwzx    $acc14,$Tbl3,$acc14
 
452
        lwzx    $acc15,$Tbl3,$acc15
 
453
        xor     $t2,$t2,$acc06
 
454
        xor     $t3,$t3,$acc07
 
455
        xor     $t0,$t0,$acc08
 
456
        xor     $t1,$t1,$acc09
 
457
        xor     $t2,$t2,$acc10
 
458
        xor     $t3,$t3,$acc11
 
459
        xor     $s0,$t0,$acc12
 
460
        xor     $s1,$t1,$acc13
 
461
        xor     $s2,$t2,$acc14
 
462
        xor     $s3,$t3,$acc15
 
463
        addi    $key,$key,16
 
464
        bdnz-   Lenc_loop
 
465
 
 
466
        addi    $Tbl2,$Tbl0,2048
 
467
        nop
 
468
        lwz     $t0,0($key)
 
469
        lwz     $t1,4($key)
 
470
        rlwinm  $acc00,$s0,`32-24`,24,31
 
471
        rlwinm  $acc01,$s1,`32-24`,24,31
 
472
        lwz     $t2,8($key)
 
473
        lwz     $t3,12($key)
 
474
        rlwinm  $acc02,$s2,`32-24`,24,31
 
475
        rlwinm  $acc03,$s3,`32-24`,24,31
 
476
        lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Te4
 
477
        lwz     $acc09,`2048+32`($Tbl0)
 
478
        rlwinm  $acc04,$s1,`32-16`,24,31
 
479
        rlwinm  $acc05,$s2,`32-16`,24,31
 
480
        lwz     $acc10,`2048+64`($Tbl0)
 
481
        lwz     $acc11,`2048+96`($Tbl0)
 
482
        rlwinm  $acc06,$s3,`32-16`,24,31
 
483
        rlwinm  $acc07,$s0,`32-16`,24,31
 
484
        lwz     $acc12,`2048+128`($Tbl0)
 
485
        lwz     $acc13,`2048+160`($Tbl0)
 
486
        rlwinm  $acc08,$s2,`32-8`,24,31
 
487
        rlwinm  $acc09,$s3,`32-8`,24,31
 
488
        lwz     $acc14,`2048+192`($Tbl0)
 
489
        lwz     $acc15,`2048+224`($Tbl0)
 
490
        rlwinm  $acc10,$s0,`32-8`,24,31
 
491
        rlwinm  $acc11,$s1,`32-8`,24,31
 
492
        lbzx    $acc00,$Tbl2,$acc00
 
493
        lbzx    $acc01,$Tbl2,$acc01
 
494
        rlwinm  $acc12,$s3,`0`,24,31
 
495
        rlwinm  $acc13,$s0,`0`,24,31
 
496
        lbzx    $acc02,$Tbl2,$acc02
 
497
        lbzx    $acc03,$Tbl2,$acc03
 
498
        rlwinm  $acc14,$s1,`0`,24,31
 
499
        rlwinm  $acc15,$s2,`0`,24,31
 
500
        lbzx    $acc04,$Tbl2,$acc04
 
501
        lbzx    $acc05,$Tbl2,$acc05
 
502
        rlwinm  $s0,$acc00,24,0,7
 
503
        rlwinm  $s1,$acc01,24,0,7
 
504
        lbzx    $acc06,$Tbl2,$acc06
 
505
        lbzx    $acc07,$Tbl2,$acc07
 
506
        rlwinm  $s2,$acc02,24,0,7
 
507
        rlwinm  $s3,$acc03,24,0,7
 
508
        lbzx    $acc08,$Tbl2,$acc08
 
509
        lbzx    $acc09,$Tbl2,$acc09
 
510
        rlwimi  $s0,$acc04,16,8,15
 
511
        rlwimi  $s1,$acc05,16,8,15
 
512
        lbzx    $acc10,$Tbl2,$acc10
 
513
        lbzx    $acc11,$Tbl2,$acc11
 
514
        rlwimi  $s2,$acc06,16,8,15
 
515
        rlwimi  $s3,$acc07,16,8,15
 
516
        lbzx    $acc12,$Tbl2,$acc12
 
517
        lbzx    $acc13,$Tbl2,$acc13
 
518
        rlwimi  $s0,$acc08,8,16,23
 
519
        rlwimi  $s1,$acc09,8,16,23
 
520
        lbzx    $acc14,$Tbl2,$acc14
 
521
        lbzx    $acc15,$Tbl2,$acc15
 
522
        rlwimi  $s2,$acc10,8,16,23
 
523
        rlwimi  $s3,$acc11,8,16,23
 
524
        or      $s0,$s0,$acc12
 
525
        or      $s1,$s1,$acc13
 
526
        or      $s2,$s2,$acc14
 
527
        or      $s3,$s3,$acc15
 
528
        xor     $s0,$s0,$t0
 
529
        xor     $s1,$s1,$t1
 
530
        xor     $s2,$s2,$t2
 
531
        xor     $s3,$s3,$t3
 
532
        blr
 
533
 
 
534
.align  4
 
535
Lppc_AES_encrypt_compact:
 
536
        lwz     $acc00,240($key)
 
537
        lwz     $t0,0($key)
 
538
        lwz     $t1,4($key)
 
539
        lwz     $t2,8($key)
 
540
        lwz     $t3,12($key)
 
541
        addi    $Tbl1,$Tbl0,2048
 
542
        lis     $mask80,0x8080
 
543
        lis     $mask1b,0x1b1b
 
544
        addi    $key,$key,16
 
545
        ori     $mask80,$mask80,0x8080
 
546
        ori     $mask1b,$mask1b,0x1b1b
 
547
        mtctr   $acc00
 
548
.align  4
 
549
Lenc_compact_loop:
 
550
        xor     $s0,$s0,$t0
 
551
        xor     $s1,$s1,$t1
 
552
        xor     $s2,$s2,$t2
 
553
        xor     $s3,$s3,$t3
 
554
        rlwinm  $acc00,$s0,`32-24`,24,31
 
555
        rlwinm  $acc01,$s1,`32-24`,24,31
 
556
        rlwinm  $acc02,$s2,`32-24`,24,31
 
557
        rlwinm  $acc03,$s3,`32-24`,24,31
 
558
        rlwinm  $acc04,$s1,`32-16`,24,31
 
559
        rlwinm  $acc05,$s2,`32-16`,24,31
 
560
        rlwinm  $acc06,$s3,`32-16`,24,31
 
561
        rlwinm  $acc07,$s0,`32-16`,24,31
 
562
        lbzx    $acc00,$Tbl1,$acc00
 
563
        lbzx    $acc01,$Tbl1,$acc01
 
564
        rlwinm  $acc08,$s2,`32-8`,24,31
 
565
        rlwinm  $acc09,$s3,`32-8`,24,31
 
566
        lbzx    $acc02,$Tbl1,$acc02
 
567
        lbzx    $acc03,$Tbl1,$acc03
 
568
        rlwinm  $acc10,$s0,`32-8`,24,31
 
569
        rlwinm  $acc11,$s1,`32-8`,24,31
 
570
        lbzx    $acc04,$Tbl1,$acc04
 
571
        lbzx    $acc05,$Tbl1,$acc05
 
572
        rlwinm  $acc12,$s3,`0`,24,31
 
573
        rlwinm  $acc13,$s0,`0`,24,31
 
574
        lbzx    $acc06,$Tbl1,$acc06
 
575
        lbzx    $acc07,$Tbl1,$acc07
 
576
        rlwinm  $acc14,$s1,`0`,24,31
 
577
        rlwinm  $acc15,$s2,`0`,24,31
 
578
        lbzx    $acc08,$Tbl1,$acc08
 
579
        lbzx    $acc09,$Tbl1,$acc09
 
580
        rlwinm  $s0,$acc00,24,0,7
 
581
        rlwinm  $s1,$acc01,24,0,7
 
582
        lbzx    $acc10,$Tbl1,$acc10
 
583
        lbzx    $acc11,$Tbl1,$acc11
 
584
        rlwinm  $s2,$acc02,24,0,7
 
585
        rlwinm  $s3,$acc03,24,0,7
 
586
        lbzx    $acc12,$Tbl1,$acc12
 
587
        lbzx    $acc13,$Tbl1,$acc13
 
588
        rlwimi  $s0,$acc04,16,8,15
 
589
        rlwimi  $s1,$acc05,16,8,15
 
590
        lbzx    $acc14,$Tbl1,$acc14
 
591
        lbzx    $acc15,$Tbl1,$acc15
 
592
        rlwimi  $s2,$acc06,16,8,15
 
593
        rlwimi  $s3,$acc07,16,8,15
 
594
        rlwimi  $s0,$acc08,8,16,23
 
595
        rlwimi  $s1,$acc09,8,16,23
 
596
        rlwimi  $s2,$acc10,8,16,23
 
597
        rlwimi  $s3,$acc11,8,16,23
 
598
        lwz     $t0,0($key)
 
599
        lwz     $t1,4($key)
 
600
        or      $s0,$s0,$acc12
 
601
        or      $s1,$s1,$acc13
 
602
        lwz     $t2,8($key)
 
603
        lwz     $t3,12($key)
 
604
        or      $s2,$s2,$acc14
 
605
        or      $s3,$s3,$acc15
 
606
 
 
607
        addi    $key,$key,16
 
608
        bdz     Lenc_compact_done
 
609
 
 
610
        and     $acc00,$s0,$mask80      # r1=r0&0x80808080
 
611
        and     $acc01,$s1,$mask80
 
612
        and     $acc02,$s2,$mask80
 
613
        and     $acc03,$s3,$mask80
 
614
        srwi    $acc04,$acc00,7         # r1>>7
 
615
        srwi    $acc05,$acc01,7
 
616
        srwi    $acc06,$acc02,7
 
617
        srwi    $acc07,$acc03,7
 
618
        andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
 
619
        andc    $acc09,$s1,$mask80
 
620
        andc    $acc10,$s2,$mask80
 
621
        andc    $acc11,$s3,$mask80
 
622
        sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
 
623
        sub     $acc01,$acc01,$acc05
 
624
        sub     $acc02,$acc02,$acc06
 
625
        sub     $acc03,$acc03,$acc07
 
626
        add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
 
627
        add     $acc09,$acc09,$acc09
 
628
        add     $acc10,$acc10,$acc10
 
629
        add     $acc11,$acc11,$acc11
 
630
        and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 
631
        and     $acc01,$acc01,$mask1b
 
632
        and     $acc02,$acc02,$mask1b
 
633
        and     $acc03,$acc03,$mask1b
 
634
        xor     $acc00,$acc00,$acc08    # r2
 
635
        xor     $acc01,$acc01,$acc09
 
636
        xor     $acc02,$acc02,$acc10
 
637
        xor     $acc03,$acc03,$acc11
 
638
 
 
639
        rotlwi  $acc12,$s0,16           # ROTATE(r0,16)
 
640
        rotlwi  $acc13,$s1,16
 
641
        rotlwi  $acc14,$s2,16
 
642
        rotlwi  $acc15,$s3,16
 
643
        xor     $s0,$s0,$acc00          # r0^r2
 
644
        xor     $s1,$s1,$acc01
 
645
        xor     $s2,$s2,$acc02
 
646
        xor     $s3,$s3,$acc03
 
647
        rotrwi  $s0,$s0,24              # ROTATE(r2^r0,24)
 
648
        rotrwi  $s1,$s1,24
 
649
        rotrwi  $s2,$s2,24
 
650
        rotrwi  $s3,$s3,24
 
651
        xor     $s0,$s0,$acc00          # ROTATE(r2^r0,24)^r2
 
652
        xor     $s1,$s1,$acc01
 
653
        xor     $s2,$s2,$acc02
 
654
        xor     $s3,$s3,$acc03
 
655
        rotlwi  $acc08,$acc12,8         # ROTATE(r0,24)
 
656
        rotlwi  $acc09,$acc13,8
 
657
        rotlwi  $acc10,$acc14,8
 
658
        rotlwi  $acc11,$acc15,8
 
659
        xor     $s0,$s0,$acc12          #
 
660
        xor     $s1,$s1,$acc13
 
661
        xor     $s2,$s2,$acc14
 
662
        xor     $s3,$s3,$acc15
 
663
        xor     $s0,$s0,$acc08          #
 
664
        xor     $s1,$s1,$acc09
 
665
        xor     $s2,$s2,$acc10
 
666
        xor     $s3,$s3,$acc11
 
667
 
 
668
        b       Lenc_compact_loop
 
669
.align  4
 
670
Lenc_compact_done:
 
671
        xor     $s0,$s0,$t0
 
672
        xor     $s1,$s1,$t1
 
673
        xor     $s2,$s2,$t2
 
674
        xor     $s3,$s3,$t3
 
675
        blr
 
676
 
 
677
.globl  .AES_decrypt
 
678
.align  7
 
679
.AES_decrypt:
 
680
        mflr    r0
 
681
        $STU    $sp,-$FRAME($sp)
 
682
 
 
683
        $PUSH   r0,`$FRAME-$SIZE_T*21`($sp)
 
684
        $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
 
685
        $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
 
686
        $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
 
687
        $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
 
688
        $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
 
689
        $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
 
690
        $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
 
691
        $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
 
692
        $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
 
693
        $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
 
694
        $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
 
695
        $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
 
696
        $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
 
697
        $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
 
698
        $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
 
699
        $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
 
700
        $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
 
701
        $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
 
702
        $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
 
703
        $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
 
704
 
 
705
        lwz     $s0,0($inp)
 
706
        lwz     $s1,4($inp)
 
707
        lwz     $s2,8($inp)
 
708
        lwz     $s3,12($inp)
 
709
        bl      LAES_Td
 
710
        bl      Lppc_AES_decrypt_compact
 
711
        stw     $s0,0($out)
 
712
        stw     $s1,4($out)
 
713
        stw     $s2,8($out)
 
714
        stw     $s3,12($out)
 
715
 
 
716
        $POP    r0,`$FRAME-$SIZE_T*21`($sp)
 
717
        $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
 
718
        $POP    r13,`$FRAME-$SIZE_T*19`($sp)
 
719
        $POP    r14,`$FRAME-$SIZE_T*18`($sp)
 
720
        $POP    r15,`$FRAME-$SIZE_T*17`($sp)
 
721
        $POP    r16,`$FRAME-$SIZE_T*16`($sp)
 
722
        $POP    r17,`$FRAME-$SIZE_T*15`($sp)
 
723
        $POP    r18,`$FRAME-$SIZE_T*14`($sp)
 
724
        $POP    r19,`$FRAME-$SIZE_T*13`($sp)
 
725
        $POP    r20,`$FRAME-$SIZE_T*12`($sp)
 
726
        $POP    r21,`$FRAME-$SIZE_T*11`($sp)
 
727
        $POP    r22,`$FRAME-$SIZE_T*10`($sp)
 
728
        $POP    r23,`$FRAME-$SIZE_T*9`($sp)
 
729
        $POP    r24,`$FRAME-$SIZE_T*8`($sp)
 
730
        $POP    r25,`$FRAME-$SIZE_T*7`($sp)
 
731
        $POP    r26,`$FRAME-$SIZE_T*6`($sp)
 
732
        $POP    r27,`$FRAME-$SIZE_T*5`($sp)
 
733
        $POP    r28,`$FRAME-$SIZE_T*4`($sp)
 
734
        $POP    r29,`$FRAME-$SIZE_T*3`($sp)
 
735
        $POP    r30,`$FRAME-$SIZE_T*2`($sp)
 
736
        $POP    r31,`$FRAME-$SIZE_T*1`($sp)
 
737
        mtlr    r0
 
738
        addi    $sp,$sp,$FRAME
 
739
        blr
 
740
 
 
741
.align  5
 
742
Lppc_AES_decrypt:
 
743
        lwz     $acc00,240($key)
 
744
        lwz     $t0,0($key)
 
745
        lwz     $t1,4($key)
 
746
        lwz     $t2,8($key)
 
747
        lwz     $t3,12($key)
 
748
        addi    $Tbl1,$Tbl0,3
 
749
        addi    $Tbl2,$Tbl0,2
 
750
        addi    $Tbl3,$Tbl0,1
 
751
        addi    $acc00,$acc00,-1
 
752
        addi    $key,$key,16
 
753
        xor     $s0,$s0,$t0
 
754
        xor     $s1,$s1,$t1
 
755
        xor     $s2,$s2,$t2
 
756
        xor     $s3,$s3,$t3
 
757
        mtctr   $acc00
 
758
.align  4
 
759
Ldec_loop:
 
760
        rlwinm  $acc00,$s0,`32-24+3`,21,28
 
761
        rlwinm  $acc01,$s1,`32-24+3`,21,28
 
762
        rlwinm  $acc02,$s2,`32-24+3`,21,28
 
763
        rlwinm  $acc03,$s3,`32-24+3`,21,28
 
764
        lwz     $t0,0($key)
 
765
        lwz     $t1,4($key)
 
766
        rlwinm  $acc04,$s3,`32-16+3`,21,28
 
767
        rlwinm  $acc05,$s0,`32-16+3`,21,28
 
768
        lwz     $t2,8($key)
 
769
        lwz     $t3,12($key)
 
770
        rlwinm  $acc06,$s1,`32-16+3`,21,28
 
771
        rlwinm  $acc07,$s2,`32-16+3`,21,28
 
772
        lwzx    $acc00,$Tbl0,$acc00
 
773
        lwzx    $acc01,$Tbl0,$acc01
 
774
        rlwinm  $acc08,$s2,`32-8+3`,21,28
 
775
        rlwinm  $acc09,$s3,`32-8+3`,21,28
 
776
        lwzx    $acc02,$Tbl0,$acc02
 
777
        lwzx    $acc03,$Tbl0,$acc03
 
778
        rlwinm  $acc10,$s0,`32-8+3`,21,28
 
779
        rlwinm  $acc11,$s1,`32-8+3`,21,28
 
780
        lwzx    $acc04,$Tbl1,$acc04
 
781
        lwzx    $acc05,$Tbl1,$acc05
 
782
        rlwinm  $acc12,$s1,`0+3`,21,28
 
783
        rlwinm  $acc13,$s2,`0+3`,21,28
 
784
        lwzx    $acc06,$Tbl1,$acc06
 
785
        lwzx    $acc07,$Tbl1,$acc07
 
786
        rlwinm  $acc14,$s3,`0+3`,21,28
 
787
        rlwinm  $acc15,$s0,`0+3`,21,28
 
788
        lwzx    $acc08,$Tbl2,$acc08
 
789
        lwzx    $acc09,$Tbl2,$acc09
 
790
        xor     $t0,$t0,$acc00
 
791
        xor     $t1,$t1,$acc01
 
792
        lwzx    $acc10,$Tbl2,$acc10
 
793
        lwzx    $acc11,$Tbl2,$acc11
 
794
        xor     $t2,$t2,$acc02
 
795
        xor     $t3,$t3,$acc03
 
796
        lwzx    $acc12,$Tbl3,$acc12
 
797
        lwzx    $acc13,$Tbl3,$acc13
 
798
        xor     $t0,$t0,$acc04
 
799
        xor     $t1,$t1,$acc05
 
800
        lwzx    $acc14,$Tbl3,$acc14
 
801
        lwzx    $acc15,$Tbl3,$acc15
 
802
        xor     $t2,$t2,$acc06
 
803
        xor     $t3,$t3,$acc07
 
804
        xor     $t0,$t0,$acc08
 
805
        xor     $t1,$t1,$acc09
 
806
        xor     $t2,$t2,$acc10
 
807
        xor     $t3,$t3,$acc11
 
808
        xor     $s0,$t0,$acc12
 
809
        xor     $s1,$t1,$acc13
 
810
        xor     $s2,$t2,$acc14
 
811
        xor     $s3,$t3,$acc15
 
812
        addi    $key,$key,16
 
813
        bdnz-   Ldec_loop
 
814
 
 
815
        addi    $Tbl2,$Tbl0,2048
 
816
        nop
 
817
        lwz     $t0,0($key)
 
818
        lwz     $t1,4($key)
 
819
        rlwinm  $acc00,$s0,`32-24`,24,31
 
820
        rlwinm  $acc01,$s1,`32-24`,24,31
 
821
        lwz     $t2,8($key)
 
822
        lwz     $t3,12($key)
 
823
        rlwinm  $acc02,$s2,`32-24`,24,31
 
824
        rlwinm  $acc03,$s3,`32-24`,24,31
 
825
        lwz     $acc08,`2048+0`($Tbl0)  ! prefetch Td4
 
826
        lwz     $acc09,`2048+32`($Tbl0)
 
827
        rlwinm  $acc04,$s3,`32-16`,24,31
 
828
        rlwinm  $acc05,$s0,`32-16`,24,31
 
829
        lwz     $acc10,`2048+64`($Tbl0)
 
830
        lwz     $acc11,`2048+96`($Tbl0)
 
831
        lbzx    $acc00,$Tbl2,$acc00
 
832
        lbzx    $acc01,$Tbl2,$acc01
 
833
        lwz     $acc12,`2048+128`($Tbl0)
 
834
        lwz     $acc13,`2048+160`($Tbl0)
 
835
        rlwinm  $acc06,$s1,`32-16`,24,31
 
836
        rlwinm  $acc07,$s2,`32-16`,24,31
 
837
        lwz     $acc14,`2048+192`($Tbl0)
 
838
        lwz     $acc15,`2048+224`($Tbl0)
 
839
        rlwinm  $acc08,$s2,`32-8`,24,31
 
840
        rlwinm  $acc09,$s3,`32-8`,24,31
 
841
        lbzx    $acc02,$Tbl2,$acc02
 
842
        lbzx    $acc03,$Tbl2,$acc03
 
843
        rlwinm  $acc10,$s0,`32-8`,24,31
 
844
        rlwinm  $acc11,$s1,`32-8`,24,31
 
845
        lbzx    $acc04,$Tbl2,$acc04
 
846
        lbzx    $acc05,$Tbl2,$acc05
 
847
        rlwinm  $acc12,$s1,`0`,24,31
 
848
        rlwinm  $acc13,$s2,`0`,24,31
 
849
        lbzx    $acc06,$Tbl2,$acc06
 
850
        lbzx    $acc07,$Tbl2,$acc07
 
851
        rlwinm  $acc14,$s3,`0`,24,31
 
852
        rlwinm  $acc15,$s0,`0`,24,31
 
853
        lbzx    $acc08,$Tbl2,$acc08
 
854
        lbzx    $acc09,$Tbl2,$acc09
 
855
        rlwinm  $s0,$acc00,24,0,7
 
856
        rlwinm  $s1,$acc01,24,0,7
 
857
        lbzx    $acc10,$Tbl2,$acc10
 
858
        lbzx    $acc11,$Tbl2,$acc11
 
859
        rlwinm  $s2,$acc02,24,0,7
 
860
        rlwinm  $s3,$acc03,24,0,7
 
861
        lbzx    $acc12,$Tbl2,$acc12
 
862
        lbzx    $acc13,$Tbl2,$acc13
 
863
        rlwimi  $s0,$acc04,16,8,15
 
864
        rlwimi  $s1,$acc05,16,8,15
 
865
        lbzx    $acc14,$Tbl2,$acc14
 
866
        lbzx    $acc15,$Tbl2,$acc15
 
867
        rlwimi  $s2,$acc06,16,8,15
 
868
        rlwimi  $s3,$acc07,16,8,15
 
869
        rlwimi  $s0,$acc08,8,16,23
 
870
        rlwimi  $s1,$acc09,8,16,23
 
871
        rlwimi  $s2,$acc10,8,16,23
 
872
        rlwimi  $s3,$acc11,8,16,23
 
873
        or      $s0,$s0,$acc12
 
874
        or      $s1,$s1,$acc13
 
875
        or      $s2,$s2,$acc14
 
876
        or      $s3,$s3,$acc15
 
877
        xor     $s0,$s0,$t0
 
878
        xor     $s1,$s1,$t1
 
879
        xor     $s2,$s2,$t2
 
880
        xor     $s3,$s3,$t3
 
881
        blr
 
882
 
 
883
.align  4
 
884
Lppc_AES_decrypt_compact:
 
885
        lwz     $acc00,240($key)
 
886
        lwz     $t0,0($key)
 
887
        lwz     $t1,4($key)
 
888
        lwz     $t2,8($key)
 
889
        lwz     $t3,12($key)
 
890
        addi    $Tbl1,$Tbl0,2048
 
891
        lis     $mask80,0x8080
 
892
        lis     $mask1b,0x1b1b
 
893
        addi    $key,$key,16
 
894
        ori     $mask80,$mask80,0x8080
 
895
        ori     $mask1b,$mask1b,0x1b1b
 
896
___
 
897
$code.=<<___ if ($SIZE_T==8);
 
898
        insrdi  $mask80,$mask80,32,0
 
899
        insrdi  $mask1b,$mask1b,32,0
 
900
___
 
901
$code.=<<___;
 
902
        mtctr   $acc00
 
903
.align  4
 
904
Ldec_compact_loop:
 
905
        xor     $s0,$s0,$t0
 
906
        xor     $s1,$s1,$t1
 
907
        xor     $s2,$s2,$t2
 
908
        xor     $s3,$s3,$t3
 
909
        rlwinm  $acc00,$s0,`32-24`,24,31
 
910
        rlwinm  $acc01,$s1,`32-24`,24,31
 
911
        rlwinm  $acc02,$s2,`32-24`,24,31
 
912
        rlwinm  $acc03,$s3,`32-24`,24,31
 
913
        rlwinm  $acc04,$s3,`32-16`,24,31
 
914
        rlwinm  $acc05,$s0,`32-16`,24,31
 
915
        rlwinm  $acc06,$s1,`32-16`,24,31
 
916
        rlwinm  $acc07,$s2,`32-16`,24,31
 
917
        lbzx    $acc00,$Tbl1,$acc00
 
918
        lbzx    $acc01,$Tbl1,$acc01
 
919
        rlwinm  $acc08,$s2,`32-8`,24,31
 
920
        rlwinm  $acc09,$s3,`32-8`,24,31
 
921
        lbzx    $acc02,$Tbl1,$acc02
 
922
        lbzx    $acc03,$Tbl1,$acc03
 
923
        rlwinm  $acc10,$s0,`32-8`,24,31
 
924
        rlwinm  $acc11,$s1,`32-8`,24,31
 
925
        lbzx    $acc04,$Tbl1,$acc04
 
926
        lbzx    $acc05,$Tbl1,$acc05
 
927
        rlwinm  $acc12,$s1,`0`,24,31
 
928
        rlwinm  $acc13,$s2,`0`,24,31
 
929
        lbzx    $acc06,$Tbl1,$acc06
 
930
        lbzx    $acc07,$Tbl1,$acc07
 
931
        rlwinm  $acc14,$s3,`0`,24,31
 
932
        rlwinm  $acc15,$s0,`0`,24,31
 
933
        lbzx    $acc08,$Tbl1,$acc08
 
934
        lbzx    $acc09,$Tbl1,$acc09
 
935
        rlwinm  $s0,$acc00,24,0,7
 
936
        rlwinm  $s1,$acc01,24,0,7
 
937
        lbzx    $acc10,$Tbl1,$acc10
 
938
        lbzx    $acc11,$Tbl1,$acc11
 
939
        rlwinm  $s2,$acc02,24,0,7
 
940
        rlwinm  $s3,$acc03,24,0,7
 
941
        lbzx    $acc12,$Tbl1,$acc12
 
942
        lbzx    $acc13,$Tbl1,$acc13
 
943
        rlwimi  $s0,$acc04,16,8,15
 
944
        rlwimi  $s1,$acc05,16,8,15
 
945
        lbzx    $acc14,$Tbl1,$acc14
 
946
        lbzx    $acc15,$Tbl1,$acc15
 
947
        rlwimi  $s2,$acc06,16,8,15
 
948
        rlwimi  $s3,$acc07,16,8,15
 
949
        rlwimi  $s0,$acc08,8,16,23
 
950
        rlwimi  $s1,$acc09,8,16,23
 
951
        rlwimi  $s2,$acc10,8,16,23
 
952
        rlwimi  $s3,$acc11,8,16,23
 
953
        lwz     $t0,0($key)
 
954
        lwz     $t1,4($key)
 
955
        or      $s0,$s0,$acc12
 
956
        or      $s1,$s1,$acc13
 
957
        lwz     $t2,8($key)
 
958
        lwz     $t3,12($key)
 
959
        or      $s2,$s2,$acc14
 
960
        or      $s3,$s3,$acc15
 
961
 
 
962
        addi    $key,$key,16
 
963
        bdz     Ldec_compact_done
 
964
___
 
965
$code.=<<___ if ($SIZE_T==8);
 
966
        # vectorized permutation improves decrypt performance by 10%
 
967
        insrdi  $s0,$s1,32,0
 
968
        insrdi  $s2,$s3,32,0
 
969
 
 
970
        and     $acc00,$s0,$mask80      # r1=r0&0x80808080
 
971
        and     $acc02,$s2,$mask80
 
972
        srdi    $acc04,$acc00,7         # r1>>7
 
973
        srdi    $acc06,$acc02,7
 
974
        andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
 
975
        andc    $acc10,$s2,$mask80
 
976
        sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
 
977
        sub     $acc02,$acc02,$acc06
 
978
        add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
 
979
        add     $acc10,$acc10,$acc10
 
980
        and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 
981
        and     $acc02,$acc02,$mask1b
 
982
        xor     $acc00,$acc00,$acc08    # r2
 
983
        xor     $acc02,$acc02,$acc10
 
984
 
 
985
        and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
 
986
        and     $acc06,$acc02,$mask80
 
987
        srdi    $acc08,$acc04,7         # r1>>7
 
988
        srdi    $acc10,$acc06,7
 
989
        andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
 
990
        andc    $acc14,$acc02,$mask80
 
991
        sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
 
992
        sub     $acc06,$acc06,$acc10
 
993
        add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
 
994
        add     $acc14,$acc14,$acc14
 
995
        and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 
996
        and     $acc06,$acc06,$mask1b
 
997
        xor     $acc04,$acc04,$acc12    # r4
 
998
        xor     $acc06,$acc06,$acc14
 
999
 
 
1000
        and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
 
1001
        and     $acc10,$acc06,$mask80
 
1002
        srdi    $acc12,$acc08,7         # r1>>7
 
1003
        srdi    $acc14,$acc10,7
 
1004
        sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
 
1005
        sub     $acc10,$acc10,$acc14
 
1006
        andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
 
1007
        andc    $acc14,$acc06,$mask80
 
1008
        add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
 
1009
        add     $acc14,$acc14,$acc14
 
1010
        and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 
1011
        and     $acc10,$acc10,$mask1b
 
1012
        xor     $acc08,$acc08,$acc12    # r8
 
1013
        xor     $acc10,$acc10,$acc14
 
1014
 
 
1015
        xor     $acc00,$acc00,$s0       # r2^r0
 
1016
        xor     $acc02,$acc02,$s2
 
1017
        xor     $acc04,$acc04,$s0       # r4^r0
 
1018
        xor     $acc06,$acc06,$s2
 
1019
 
 
1020
        extrdi  $acc01,$acc00,32,0
 
1021
        extrdi  $acc03,$acc02,32,0
 
1022
        extrdi  $acc05,$acc04,32,0
 
1023
        extrdi  $acc07,$acc06,32,0
 
1024
        extrdi  $acc09,$acc08,32,0
 
1025
        extrdi  $acc11,$acc10,32,0
 
1026
___
 
1027
$code.=<<___ if ($SIZE_T==4);
 
1028
        and     $acc00,$s0,$mask80      # r1=r0&0x80808080
 
1029
        and     $acc01,$s1,$mask80
 
1030
        and     $acc02,$s2,$mask80
 
1031
        and     $acc03,$s3,$mask80
 
1032
        srwi    $acc04,$acc00,7         # r1>>7
 
1033
        srwi    $acc05,$acc01,7
 
1034
        srwi    $acc06,$acc02,7
 
1035
        srwi    $acc07,$acc03,7
 
1036
        andc    $acc08,$s0,$mask80      # r0&0x7f7f7f7f
 
1037
        andc    $acc09,$s1,$mask80
 
1038
        andc    $acc10,$s2,$mask80
 
1039
        andc    $acc11,$s3,$mask80
 
1040
        sub     $acc00,$acc00,$acc04    # r1-(r1>>7)
 
1041
        sub     $acc01,$acc01,$acc05
 
1042
        sub     $acc02,$acc02,$acc06
 
1043
        sub     $acc03,$acc03,$acc07
 
1044
        add     $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
 
1045
        add     $acc09,$acc09,$acc09
 
1046
        add     $acc10,$acc10,$acc10
 
1047
        add     $acc11,$acc11,$acc11
 
1048
        and     $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 
1049
        and     $acc01,$acc01,$mask1b
 
1050
        and     $acc02,$acc02,$mask1b
 
1051
        and     $acc03,$acc03,$mask1b
 
1052
        xor     $acc00,$acc00,$acc08    # r2
 
1053
        xor     $acc01,$acc01,$acc09
 
1054
        xor     $acc02,$acc02,$acc10
 
1055
        xor     $acc03,$acc03,$acc11
 
1056
 
 
1057
        and     $acc04,$acc00,$mask80   # r1=r2&0x80808080
 
1058
        and     $acc05,$acc01,$mask80
 
1059
        and     $acc06,$acc02,$mask80
 
1060
        and     $acc07,$acc03,$mask80
 
1061
        srwi    $acc08,$acc04,7         # r1>>7
 
1062
        srwi    $acc09,$acc05,7
 
1063
        srwi    $acc10,$acc06,7
 
1064
        srwi    $acc11,$acc07,7
 
1065
        andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
 
1066
        andc    $acc13,$acc01,$mask80
 
1067
        andc    $acc14,$acc02,$mask80
 
1068
        andc    $acc15,$acc03,$mask80
 
1069
        sub     $acc04,$acc04,$acc08    # r1-(r1>>7)
 
1070
        sub     $acc05,$acc05,$acc09
 
1071
        sub     $acc06,$acc06,$acc10
 
1072
        sub     $acc07,$acc07,$acc11
 
1073
        add     $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
 
1074
        add     $acc13,$acc13,$acc13
 
1075
        add     $acc14,$acc14,$acc14
 
1076
        add     $acc15,$acc15,$acc15
 
1077
        and     $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 
1078
        and     $acc05,$acc05,$mask1b
 
1079
        and     $acc06,$acc06,$mask1b
 
1080
        and     $acc07,$acc07,$mask1b
 
1081
        xor     $acc04,$acc04,$acc12    # r4
 
1082
        xor     $acc05,$acc05,$acc13
 
1083
        xor     $acc06,$acc06,$acc14
 
1084
        xor     $acc07,$acc07,$acc15
 
1085
 
 
1086
        and     $acc08,$acc04,$mask80   # r1=r4&0x80808080
 
1087
        and     $acc09,$acc05,$mask80
 
1088
        and     $acc10,$acc06,$mask80
 
1089
        and     $acc11,$acc07,$mask80
 
1090
        srwi    $acc12,$acc08,7         # r1>>7
 
1091
        srwi    $acc13,$acc09,7
 
1092
        srwi    $acc14,$acc10,7
 
1093
        srwi    $acc15,$acc11,7
 
1094
        sub     $acc08,$acc08,$acc12    # r1-(r1>>7)
 
1095
        sub     $acc09,$acc09,$acc13
 
1096
        sub     $acc10,$acc10,$acc14
 
1097
        sub     $acc11,$acc11,$acc15
 
1098
        andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
 
1099
        andc    $acc13,$acc05,$mask80
 
1100
        andc    $acc14,$acc06,$mask80
 
1101
        andc    $acc15,$acc07,$mask80
 
1102
        add     $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
 
1103
        add     $acc13,$acc13,$acc13
 
1104
        add     $acc14,$acc14,$acc14
 
1105
        add     $acc15,$acc15,$acc15
 
1106
        and     $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 
1107
        and     $acc09,$acc09,$mask1b
 
1108
        and     $acc10,$acc10,$mask1b
 
1109
        and     $acc11,$acc11,$mask1b
 
1110
        xor     $acc08,$acc08,$acc12    # r8
 
1111
        xor     $acc09,$acc09,$acc13
 
1112
        xor     $acc10,$acc10,$acc14
 
1113
        xor     $acc11,$acc11,$acc15
 
1114
 
 
1115
        xor     $acc00,$acc00,$s0       # r2^r0
 
1116
        xor     $acc01,$acc01,$s1
 
1117
        xor     $acc02,$acc02,$s2
 
1118
        xor     $acc03,$acc03,$s3
 
1119
        xor     $acc04,$acc04,$s0       # r4^r0
 
1120
        xor     $acc05,$acc05,$s1
 
1121
        xor     $acc06,$acc06,$s2
 
1122
        xor     $acc07,$acc07,$s3
 
1123
___
 
1124
$code.=<<___;
 
1125
        rotrwi  $s0,$s0,8               # = ROTATE(r0,8)
 
1126
        rotrwi  $s1,$s1,8
 
1127
        rotrwi  $s2,$s2,8
 
1128
        rotrwi  $s3,$s3,8
 
1129
        xor     $s0,$s0,$acc00          # ^= r2^r0
 
1130
        xor     $s1,$s1,$acc01
 
1131
        xor     $s2,$s2,$acc02
 
1132
        xor     $s3,$s3,$acc03
 
1133
        xor     $acc00,$acc00,$acc08
 
1134
        xor     $acc01,$acc01,$acc09
 
1135
        xor     $acc02,$acc02,$acc10
 
1136
        xor     $acc03,$acc03,$acc11
 
1137
        xor     $s0,$s0,$acc04          # ^= r4^r0
 
1138
        xor     $s1,$s1,$acc05
 
1139
        xor     $s2,$s2,$acc06
 
1140
        xor     $s3,$s3,$acc07
 
1141
        rotrwi  $acc00,$acc00,24
 
1142
        rotrwi  $acc01,$acc01,24
 
1143
        rotrwi  $acc02,$acc02,24
 
1144
        rotrwi  $acc03,$acc03,24
 
1145
        xor     $acc04,$acc04,$acc08
 
1146
        xor     $acc05,$acc05,$acc09
 
1147
        xor     $acc06,$acc06,$acc10
 
1148
        xor     $acc07,$acc07,$acc11
 
1149
        xor     $s0,$s0,$acc08          # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
 
1150
        xor     $s1,$s1,$acc09
 
1151
        xor     $s2,$s2,$acc10
 
1152
        xor     $s3,$s3,$acc11
 
1153
        rotrwi  $acc04,$acc04,16
 
1154
        rotrwi  $acc05,$acc05,16
 
1155
        rotrwi  $acc06,$acc06,16
 
1156
        rotrwi  $acc07,$acc07,16
 
1157
        xor     $s0,$s0,$acc00          # ^= ROTATE(r8^r2^r0,24)
 
1158
        xor     $s1,$s1,$acc01
 
1159
        xor     $s2,$s2,$acc02
 
1160
        xor     $s3,$s3,$acc03
 
1161
        rotrwi  $acc08,$acc08,8
 
1162
        rotrwi  $acc09,$acc09,8
 
1163
        rotrwi  $acc10,$acc10,8
 
1164
        rotrwi  $acc11,$acc11,8
 
1165
        xor     $s0,$s0,$acc04          # ^= ROTATE(r8^r4^r0,16)
 
1166
        xor     $s1,$s1,$acc05
 
1167
        xor     $s2,$s2,$acc06
 
1168
        xor     $s3,$s3,$acc07
 
1169
        xor     $s0,$s0,$acc08          # ^= ROTATE(r8,8)       
 
1170
        xor     $s1,$s1,$acc09  
 
1171
        xor     $s2,$s2,$acc10  
 
1172
        xor     $s3,$s3,$acc11  
 
1173
 
 
1174
        b       Ldec_compact_loop
 
1175
.align  4
 
1176
Ldec_compact_done:
 
1177
        xor     $s0,$s0,$t0
 
1178
        xor     $s1,$s1,$t1
 
1179
        xor     $s2,$s2,$t2
 
1180
        xor     $s3,$s3,$t3
 
1181
        blr
 
1182
.long   0
 
1183
.asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
 
1184
.align  7
 
1185
___
 
1186
 
 
1187
$code =~ s/\`([^\`]*)\`/eval $1/gem;
 
1188
print $code;
 
1189
close STDOUT;