2
---------------------------------------------------------------------------
3
Copyright (c) 2003, Dr Brian Gladman < >, Worcester, UK.
8
The free distribution and use of this software in both source and binary
9
form is allowed (with or without changes) provided that:
11
1. distributions of this source code include the above copyright
12
notice, this list of conditions and the following disclaimer;
14
2. distributions in binary form include the above copyright
15
notice, this list of conditions and the following disclaimer
16
in the documentation and/or other associated materials;
18
3. the copyright holder's name is not used to endorse products
19
built using this software without specific written permission.
21
ALTERNATIVELY, provided that this notice is retained in full, this product
22
may be distributed under the terms of the GNU General Public License (GPL),
23
in which case the provisions of the GPL apply INSTEAD OF those given above.
27
This software is provided 'as is' with no explicit or implied warranties
28
in respect of its properties, including, but not limited to, correctness
29
and/or fitness for purpose.
30
---------------------------------------------------------------------------
31
Issue Date: 26/08/2003
33
This file contains the code for implementing encryption and decryption
34
for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
35
can optionally be replaced by code written in assembler using NASM. For
36
further details see the file aesopt.h
41
#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
42
#define so(y,x,c) word_out(y, c, s(x,c))
45
#define locals(y,x) x[4],y[4]
47
#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
50
#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
51
s(y,2) = s(x,2); s(y,3) = s(x,3);
52
#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
53
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
54
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
56
#if defined(ENCRYPTION) && !defined(AES_ASM)
58
/* Visual C++ .Net v7.1 provides the fastest encryption code when using
59
Pentium optimization with small code but this is poor for decryption
60
so we need to control this with the following VC++ pragmas
64
#pragma optimize( "s", on )
67
/* Given the column (c) of the output state variable, the following
68
macros give the input state variables which are needed in its
69
computation for each row (r) of the state. All the alternative
70
macros give the same end values but expand into different ways
71
of calculating these values. In particular the complex macro
72
used for dynamically variable block sizes is designed to expand
73
to a compile time constant whenever possible but will expand to
74
conditional clauses on some branches (I am grateful to Frank
75
Yellin for this construction)
78
#define fwd_var(x,r,c)\
79
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
80
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
81
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
82
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
86
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
87
#elif defined(FT1_SET)
89
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
91
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
95
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
96
#elif defined(FL1_SET)
97
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
99
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
102
aes_rval aes_encrypt(const void *in_blk, void *out_blk, const aes_encrypt_ctx cx[1])
103
{ aes_32t locals(b0, b1);
104
const aes_32t *kp = cx->ks;
106
dec_fmvars; /* declare variables for fwd_mcol() if needed */
109
aes_32t nr = (kp[45] ^ kp[52] ^ kp[53] ? kp[52] : 14);
112
if( (nr != 10 || !(kp[0] | kp[3] | kp[4]))
113
&& (nr != 12 || !(kp[0] | kp[5] | kp[6]))
114
&& (nr != 14 || !(kp[0] | kp[7] | kp[8])) )
118
state_in(b0, in_blk, kp);
120
#if (ENC_UNROLL == FULL)
125
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
126
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
129
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
130
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
133
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
134
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
135
round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
136
round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
137
round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
138
round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
139
round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
140
round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
141
round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
142
round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
147
#if (ENC_UNROLL == PARTIAL)
149
for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
152
round(fwd_rnd, b1, b0, kp);
154
round(fwd_rnd, b0, b1, kp);
157
round(fwd_rnd, b1, b0, kp);
160
for(rnd = 0; rnd < nr - 1; ++rnd)
163
round(fwd_rnd, b1, b0, kp);
168
round(fwd_lrnd, b0, b1, kp);
172
state_out(out_blk, b0);
180
#if defined(DECRYPTION) && !defined(AES_ASM)
182
/* Visual C++ .Net v7.1 provides the fastest encryption code when using
183
Pentium optimization with small code but this is poor for decryption
184
so we need to control this with the following VC++ pragmas
187
#if defined(_MSC_VER)
188
#pragma optimize( "t", on )
191
/* Given the column (c) of the output state variable, the following
192
macros give the input state variables which are needed in its
193
computation for each row (r) of the state. All the alternative
194
macros give the same end values but expand into different ways
195
of calculating these values. In particular the complex macro
196
used for dynamically variable block sizes is designed to expand
197
to a compile time constant whenever possible but will expand to
198
conditional clauses on some branches (I am grateful to Frank
199
Yellin for this construction)
202
#define inv_var(x,r,c)\
203
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
204
: r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
205
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
206
: ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
210
#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
211
#elif defined(IT1_SET)
213
#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
215
#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
219
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
220
#elif defined(IL1_SET)
221
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
223
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
226
aes_rval aes_decrypt(const void *in_blk, void *out_blk, const aes_decrypt_ctx cx[1])
227
{ aes_32t locals(b0, b1);
229
dec_imvars; /* declare variables for inv_mcol() if needed */
232
aes_32t nr = (cx->ks[45] ^ cx->ks[52] ^ cx->ks[53] ? cx->ks[52] : 14);
233
const aes_32t *kp = cx->ks + nr * N_COLS;
236
if( (nr != 10 || !(cx->ks[0] | cx->ks[3] | cx->ks[4]))
237
&& (nr != 12 || !(cx->ks[0] | cx->ks[5] | cx->ks[6]))
238
&& (nr != 14 || !(cx->ks[0] | cx->ks[7] | cx->ks[8])) )
242
state_in(b0, in_blk, kp);
244
#if (DEC_UNROLL == FULL)
249
round(inv_rnd, b1, b0, kp - 1 * N_COLS);
250
round(inv_rnd, b0, b1, kp - 2 * N_COLS);
253
round(inv_rnd, b1, b0, kp - 1 * N_COLS);
254
round(inv_rnd, b0, b1, kp - 2 * N_COLS);
257
round(inv_rnd, b1, b0, kp - 1 * N_COLS);
258
round(inv_rnd, b0, b1, kp - 2 * N_COLS);
259
round(inv_rnd, b1, b0, kp - 3 * N_COLS);
260
round(inv_rnd, b0, b1, kp - 4 * N_COLS);
261
round(inv_rnd, b1, b0, kp - 5 * N_COLS);
262
round(inv_rnd, b0, b1, kp - 6 * N_COLS);
263
round(inv_rnd, b1, b0, kp - 7 * N_COLS);
264
round(inv_rnd, b0, b1, kp - 8 * N_COLS);
265
round(inv_rnd, b1, b0, kp - 9 * N_COLS);
266
round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
271
#if (DEC_UNROLL == PARTIAL)
273
for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
276
round(inv_rnd, b1, b0, kp);
278
round(inv_rnd, b0, b1, kp);
281
round(inv_rnd, b1, b0, kp);
284
for(rnd = 0; rnd < nr - 1; ++rnd)
287
round(inv_rnd, b1, b0, kp);
292
round(inv_lrnd, b0, b1, kp);
296
state_out(out_blk, b0);