3
* $Id: desCode.h,v 1.3 2003/08/17 15:31:51 nisse Exp $ */
5
/* des - fast & portable DES encryption & decryption.
6
* Copyright (C) 1992 Dana L. How
7
* Please see the file `descore.README' for the complete copyright notice.
12
/* optional customization:
13
* the idea here is to alter the code so it will still run correctly
14
* on any machine, but the quickest on the specific machine in mind.
15
* note that these silly tweaks can give you a 15%-20% speed improvement
16
* on the sparc -- it's probably even more significant on the 68000. */
18
/* take care of machines with incredibly few registers */
20
#define REGISTER /* only x, y, z will be declared register */
22
#define REGISTER register
25
/* is auto inc/dec faster than 7bit unsigned indexing? */
26
#if defined(vax) || defined(mc68000)
29
#define PREV(v,o) *--v
30
#define NEXT(v,o) *v++
34
#define PREV(v,o) v[o]
35
#define NEXT(v,o) v[o]
38
/* if no machine type, default is indexing, 6 registers and cheap literals */
39
#if !defined(i386) && !defined(vax) && !defined(mc68000) && !defined(sparc)
44
/* handle a compiler which can't reallocate registers */
45
/* The BYTE type is used as parameter for the encrypt/decrypt functions.
46
* It's pretty bad to have the function prototypes depend on
47
* a macro definition that the users of the function doesn't
48
* know about. /Niels */
49
#if 0 /* didn't feel like deleting */
50
#define SREGFREE ; s = (uint8_t *) D
61
/* handle constants in the optimal way for 386 & vax */
62
/* 386: we declare 3 register variables (see above) and use 3 more variables;
63
* vax: we use 6 variables, all declared register;
64
* we assume address literals are cheap & unrestricted;
65
* we assume immediate constants are cheap & unrestricted. */
66
#if defined(i386) || defined(vax)
67
#define MQ0 des_bigmap
68
#define MQ1 (des_bigmap + 64)
69
#define MQ2 (des_bigmap + 128)
70
#define MQ3 (des_bigmap + 192)
71
#define HQ0(z) /* z |= 0x01000000L; */
72
#define HQ2(z) /* z |= 0x03000200L; */
73
#define LQ0(z) 0xFCFC & z
74
#define LQ1(z) 0xFCFC & z
75
#define LQ2(z) 0xFCFC & z
76
#define LQ3(z) 0xFCFC & z
78
#define MS0 des_keymap
79
#define MS1 (des_keymap + 64)
80
#define MS2 (des_keymap + 128)
81
#define MS3 (des_keymap + 192)
82
#define MS4 (des_keymap + 256)
83
#define MS5 (des_keymap + 320)
84
#define MS6 (des_keymap + 384)
85
#define MS7 (des_keymap + 448)
87
#define LS0(z) 0xFC & z
88
#define LS1(z) 0xFC & z
89
#define LS2(z) 0xFC & z
90
#define LS3(z) 0xFC & z
95
#endif /* defined(i386) || defined(vax) */
97
/* handle constants in the optimal way for mc68000 */
98
/* in addition to the core 6 variables, we declare 3 registers holding constants
99
* and 4 registers holding address literals.
100
* at most 6 data values and 5 address values are actively used at once.
101
* we assume address literals are so expensive we never use them;
102
* we assume constant index offsets > 127 are expensive, so they are not used.
103
* we assume all constants are expensive and put them in registers,
104
* including shift counts greater than 8. */
112
#define LQ0(z) k0 & z
113
#define LQ1(z) k0 & z
114
#define LQ2(z) k0 & z
115
#define LQ3(z) k0 & z
125
#define HS(z) z |= k0;
126
#define LS0(z) k1 & z
127
#define LS1(z) k2 & z
128
#define LS2(z) k1 & z
129
#define LS3(z) k2 & z
131
register uint32_t k0, k1; \
132
register uint32_t *m0, *m1, *m2, *m3;
136
/*k2 = 28 to speed up ROL */ \
142
register uint32_t k0, k1, k2; \
143
register uint32_t *m0, *m1, *m2, *m3;
152
#endif /* defined(mc68000) */
154
/* handle constants in the optimal way for sparc */
155
/* in addition to the core 6 variables, we either declare:
156
* 4 registers holding address literals and 1 register holding a constant, or
157
* 8 registers holding address literals.
158
* up to 14 register variables are declared (sparc has %i0-%i5, %l0-%l7).
159
* we assume address literals are so expensive we never use them;
160
* we assume any constant with >10 bits is expensive and put it in a register,
161
* and any other is cheap and is coded in-line. */
169
#define LQ0(z) k0 & z
170
#define LQ1(z) k0 & z
171
#define LQ2(z) k0 & z
172
#define LQ3(z) k0 & z
183
#define LS0(z) 0xFC & z
184
#define LS1(z) 0xFC & z
185
#define LS2(z) 0xFC & z
186
#define LS3(z) 0xFC & z
188
register uint32_t k0; \
189
register uint32_t *m0, *m1, *m2, *m3;
197
register uint32_t *m0, *m1, *m2, *m3, *m4, *m5, *m6, *m7;
207
#endif /* defined(sparc) */
210
/* some basic stuff */
212
/* generate addresses from a base and an index */
213
/* FIXME: This is used only as *ADD(msi,lsi(z)) or *ADD(mqi,lqi(z)).
214
* Why not use plain indexing instead? /Niels */
215
#define ADD(b,x) (uint32_t *) ((uint8_t *)b + (x))
217
/* low level rotate operations */
219
#define ROL(d,c,o) d = d << c | d >> o
220
#define ROR(d,c,o) d = d >> c | d << o
221
#define ROL1(d) ROL(d, 1, 31)
222
#define ROR1(d) ROR(d, 1, 31)
224
/* elementary swap for doing IP/FP */
225
#define SWAP(x,y,m,b) \
226
z = ((x >> b) ^ y) & m; \
231
/* the following macros contain all the important code fragments */
233
/* load input data, then setup special registers holding constants */
234
#define TEMPQUICK(LOAD) \
238
#define TEMPSMALL(LOAD) \
244
#define LOADDATA(x,y) \
246
y = PREV(s, 7); y<<= 8; \
247
y |= PREV(s, 6); y<<= 8; \
248
y |= PREV(s, 5); y<<= 8; \
250
x = PREV(s, 3); x<<= 8; \
251
x |= PREV(s, 2); x<<= 8; \
252
x |= PREV(s, 1); x<<= 8; \
255
/* load data without initial permutation and put into efficient position */
260
/* load data, do the initial permutation and put into efficient position */
263
SWAP(x, y, 0x0F0F0F0FL, 004); \
264
SWAP(y, x, 0x0000FFFFL, 020); \
265
SWAP(x, y, 0x33333333L, 002); \
266
SWAP(y, x, 0x00FF00FFL, 010); \
268
z = (x ^ y) & 0x55555555L; \
274
/* core encryption/decryption operations */
275
/* S box mapping and P perm */
276
#define KEYMAPSMALL(x,z,mq0,mq1,hq,lq0,lq1,sq,ms0,ms1,ms2,ms3,hs,ls0,ls1,ls2,ls3)\
278
x ^= *ADD(ms3, ls3(z)); \
280
x ^= *ADD(ms2, ls2(z)); \
282
x ^= *ADD(ms1, ls1(z)); \
284
x ^= *ADD(ms0, ls0(z))
285
/* alternate version: use 64k of tables */
286
#define KEYMAPQUICK(x,z,mq0,mq1,hq,lq0,lq1,sq,ms0,ms1,ms2,ms3,hs,ls0,ls1,ls2,ls3)\
288
x ^= *ADD(mq0, lq0(z)); \
290
x ^= *ADD(mq1, lq1(z))
291
/* apply 24 key bits and do the odd s boxes */
292
#define S7S1(x,y,z,r,m,KEYMAP,LOAD) \
295
KEYMAP(x,z,MQ0,MQ1,HQ0,LQ0,LQ1,SQ,MS0,MS1,MS2,MS3,HS,LS0,LS1,LS2,LS3)
296
/* apply 24 key bits and do the even s boxes */
297
#define S6S0(x,y,z,r,m,KEYMAP,LOAD) \
301
KEYMAP(x,z,MQ2,MQ3,HQ2,LQ2,LQ3,SQ,MS4,MS5,MS6,MS7,HS,LS0,LS1,LS2,LS3)
302
/* actual iterations. equivalent except for UPDATE & swapping m and n */
303
#define ENCR(x,y,z,r,m,n,KEYMAP) \
304
S7S1(x,y,z,r,m,KEYMAP,NEXT); \
305
S6S0(x,y,z,r,n,KEYMAP,NEXT)
306
#define DECR(x,y,z,r,m,n,KEYMAP) \
307
S6S0(x,y,z,r,m,KEYMAP,PREV); \
308
S7S1(x,y,z,r,n,KEYMAP,PREV)
310
/* write out result in correct byte order */
311
#define SAVEDATA(x,y) \
312
NEXT(DEST, 0) = x; x>>= 8; \
313
NEXT(DEST, 1) = x; x>>= 8; \
314
NEXT(DEST, 2) = x; x>>= 8; \
316
NEXT(DEST, 4) = y; y>>= 8; \
317
NEXT(DEST, 5) = y; y>>= 8; \
318
NEXT(DEST, 6) = y; y>>= 8; \
320
/* write out result */
325
/* do final permutation and write out result */
328
z = (x ^ y) & 0x55555555L; \
332
SWAP(x, y, 0x00FF00FFL, 010); \
333
SWAP(y, x, 0x33333333L, 002); \
334
SWAP(x, y, 0x0000FFFFL, 020); \
335
SWAP(y, x, 0x0F0F0F0FL, 004); \
339
/* the following macros contain the encryption/decryption skeletons */
341
#define ENCRYPT(NAME, TEMP, LOAD, KEYMAP, SAVE) \
344
NAME(REGISTER BYTE *D, \
345
REGISTER const uint32_t *r, \
346
REGISTER const uint8_t *s) \
348
register uint32_t x, y, z; \
350
/* declare temps & load data */ \
353
/* do the 16 iterations */ \
354
ENCR(x,y,z,r, 0, 1,KEYMAP); \
355
ENCR(y,x,z,r, 2, 3,KEYMAP); \
356
ENCR(x,y,z,r, 4, 5,KEYMAP); \
357
ENCR(y,x,z,r, 6, 7,KEYMAP); \
358
ENCR(x,y,z,r, 8, 9,KEYMAP); \
359
ENCR(y,x,z,r,10,11,KEYMAP); \
360
ENCR(x,y,z,r,12,13,KEYMAP); \
361
ENCR(y,x,z,r,14,15,KEYMAP); \
362
ENCR(x,y,z,r,16,17,KEYMAP); \
363
ENCR(y,x,z,r,18,19,KEYMAP); \
364
ENCR(x,y,z,r,20,21,KEYMAP); \
365
ENCR(y,x,z,r,22,23,KEYMAP); \
366
ENCR(x,y,z,r,24,25,KEYMAP); \
367
ENCR(y,x,z,r,26,27,KEYMAP); \
368
ENCR(x,y,z,r,28,29,KEYMAP); \
369
ENCR(y,x,z,r,30,31,KEYMAP); \
377
#define DECRYPT(NAME, TEMP, LOAD, KEYMAP, SAVE) \
380
NAME(REGISTER BYTE *D, \
381
REGISTER const uint32_t *r, \
382
REGISTER const uint8_t *s) \
384
register uint32_t x, y, z; \
386
/* declare temps & load data */ \
389
/* do the 16 iterations */ \
391
DECR(x,y,z,r,31,30,KEYMAP); \
392
DECR(y,x,z,r,29,28,KEYMAP); \
393
DECR(x,y,z,r,27,26,KEYMAP); \
394
DECR(y,x,z,r,25,24,KEYMAP); \
395
DECR(x,y,z,r,23,22,KEYMAP); \
396
DECR(y,x,z,r,21,20,KEYMAP); \
397
DECR(x,y,z,r,19,18,KEYMAP); \
398
DECR(y,x,z,r,17,16,KEYMAP); \
399
DECR(x,y,z,r,15,14,KEYMAP); \
400
DECR(y,x,z,r,13,12,KEYMAP); \
401
DECR(x,y,z,r,11,10,KEYMAP); \
402
DECR(y,x,z,r, 9, 8,KEYMAP); \
403
DECR(x,y,z,r, 7, 6,KEYMAP); \
404
DECR(y,x,z,r, 5, 4,KEYMAP); \
405
DECR(x,y,z,r, 3, 2,KEYMAP); \
406
DECR(y,x,z,r, 1, 0,KEYMAP); \