258
259
#define TO_SIX_BIT(rslt, src) { \
260
cvt.b[0] = (unsigned char)src; src >>= 6; \
261
cvt.b[1] = (unsigned char)src; src >>= 6; \
262
cvt.b[2] = (unsigned char)src; src >>= 6; \
263
cvt.b[3] = (unsigned char)src; \
264
rslt = (cvt.b32.i0 & 0x3f3f3f3fL) << 2; \
261
cvt.b[0] = (unsigned char)(src); (src) >>= 6; \
262
cvt.b[1] = (unsigned char)(src); (src) >>= 6; \
263
cvt.b[2] = (unsigned char)(src); (src) >>= 6; \
264
cvt.b[3] = (unsigned char)(src); \
265
(rslt) = (cvt.b32.i0 & 0x3f3f3f3fL) << 2; \
268
269
* These macros may someday permit efficient use of 64-bit integers.
270
#define ZERO(d,d0,d1) d0 = 0, d1 = 0
271
#define LOAD(d,d0,d1,bl) d0 = (bl).b32.i0, d1 = (bl).b32.i1
272
#define LOADREG(d,d0,d1,s,s0,s1) d0 = s0, d1 = s1
273
#define OR(d,d0,d1,bl) d0 |= (bl).b32.i0, d1 |= (bl).b32.i1
274
#define STORE(s,s0,s1,bl) (bl).b32.i0 = s0, (bl).b32.i1 = s1
271
#define ZERO(d,d0,d1) ((d0) = 0, (d1) = 0)
272
#define LOAD(d,d0,d1,bl) ((d0) = (bl).b32.i0, (d1) = (bl).b32.i1)
273
#define LOADREG(d,d0,d1,s,s0,s1) ((d0) = (s0), (d1) = (s1))
274
#define OR(d,d0,d1,bl) ((d0) |= (bl).b32.i0, (d1) |= (bl).b32.i1)
275
#define STORE(s,s0,s1,bl) ((bl).b32.i0 = (s0), (bl).b32.i1 = (s1))
275
276
#define DCL_BLOCK(d,d0,d1) long d0, d1
277
278
#if defined(LARGEDATA)
279
280
#define LGCHUNKBITS 3
280
281
#define CHUNKBITS (1<<LGCHUNKBITS)
281
282
#define PERM6464(d,d0,d1,cpp,p) \
282
LOAD(d,d0,d1,(p)[(0<<CHUNKBITS)+(cpp)[0]]); \
283
OR (d,d0,d1,(p)[(1<<CHUNKBITS)+(cpp)[1]]); \
284
OR (d,d0,d1,(p)[(2<<CHUNKBITS)+(cpp)[2]]); \
285
OR (d,d0,d1,(p)[(3<<CHUNKBITS)+(cpp)[3]]); \
286
OR (d,d0,d1,(p)[(4<<CHUNKBITS)+(cpp)[4]]); \
287
OR (d,d0,d1,(p)[(5<<CHUNKBITS)+(cpp)[5]]); \
288
OR (d,d0,d1,(p)[(6<<CHUNKBITS)+(cpp)[6]]); \
289
OR (d,d0,d1,(p)[(7<<CHUNKBITS)+(cpp)[7]]);
283
LOAD((d),(d0),(d1),(p)[(0<<CHUNKBITS)+(cpp)[0]]); \
284
OR ((d),(d0),(d1),(p)[(1<<CHUNKBITS)+(cpp)[1]]); \
285
OR ((d),(d0),(d1),(p)[(2<<CHUNKBITS)+(cpp)[2]]); \
286
OR ((d),(d0),(d1),(p)[(3<<CHUNKBITS)+(cpp)[3]]); \
287
OR (d),(d0),(d1),(p)[(4<<CHUNKBITS)+(cpp)[4]]); \
288
OR (d),(d0),(d1),(p)[(5<<CHUNKBITS)+(cpp)[5]]); \
289
OR (d),(d0),(d1),(p)[(6<<CHUNKBITS)+(cpp)[6]]); \
290
OR (d),(d0),(d1),(p)[(7<<CHUNKBITS)+(cpp)[7]]);
290
291
#define PERM3264(d,d0,d1,cpp,p) \
291
LOAD(d,d0,d1,(p)[(0<<CHUNKBITS)+(cpp)[0]]); \
292
OR (d,d0,d1,(p)[(1<<CHUNKBITS)+(cpp)[1]]); \
293
OR (d,d0,d1,(p)[(2<<CHUNKBITS)+(cpp)[2]]); \
294
OR (d,d0,d1,(p)[(3<<CHUNKBITS)+(cpp)[3]]);
292
LOAD((d),(d0),(d1),(p)[(0<<CHUNKBITS)+(cpp)[0]]); \
293
OR ((d),(d0),(d1),(p)[(1<<CHUNKBITS)+(cpp)[1]]); \
294
OR ((d),(d0),(d1),(p)[(2<<CHUNKBITS)+(cpp)[2]]); \
295
OR ((d),(d0),(d1),(p)[(3<<CHUNKBITS)+(cpp)[3]]);
296
297
/* "small data" */
297
298
#define LGCHUNKBITS 2
298
299
#define CHUNKBITS (1<<LGCHUNKBITS)
299
300
#define PERM6464(d,d0,d1,cpp,p) \
300
{ C_block tblk; permute(cpp,&tblk,p,8); LOAD (d,d0,d1,tblk); }
301
{ C_block tblk; permute((cpp),&tblk,(p),8); LOAD ((d),(d0),(d1),tblk); }
301
302
#define PERM3264(d,d0,d1,cpp,p) \
302
{ C_block tblk; permute(cpp,&tblk,p,4); LOAD (d,d0,d1,tblk); }
303
{ C_block tblk; permute((cpp),&tblk,(p),4); LOAD ((d),(d0),(d1),tblk); }
305
306
permute(cp, out, p, chars_in)
696
#define SPTAB(t, i) (*(long *)((unsigned char *)t + i*(sizeof(long)/4)))
697
#define SPTAB(t, i) (*(long *)((unsigned char *)(t) + (i)*(sizeof(long)/4)))
697
698
#if defined(gould)
698
699
/* use this if B.b[i] is evaluated just once ... */
699
#define DOXOR(x,y,i) x^=SPTAB(SPE[0][i],B.b[i]); y^=SPTAB(SPE[1][i],B.b[i]);
700
#define DOXOR(x,y,i) (x)^=SPTAB(SPE[0][(i)],B.b[(i)]); (y)^=SPTAB(SPE[1][(i)],B.b[(i)]);
701
702
#if defined(pdp11)
702
703
/* use this if your "long" int indexing is slow */
703
#define DOXOR(x,y,i) j=B.b[i]; x^=SPTAB(SPE[0][i],j); y^=SPTAB(SPE[1][i],j);
704
#define DOXOR(x,y,i) j=B.b[(i)]; (x)^=SPTAB(SPE[0][(i)],j); (y)^=SPTAB(SPE[1][(i)],j);
705
706
/* use this if "k" is allocated to a register ... */
706
#define DOXOR(x,y,i) k=B.b[i]; x^=SPTAB(SPE[0][i],k); y^=SPTAB(SPE[1][i],k);
707
#define DOXOR(x,y,i) k=B.b[(i)]; (x)^=SPTAB(SPE[0][(i)],k); (y)^=SPTAB(SPE[1][(i)],k);
710
711
#define CRUNCH(p0, p1, q0, q1) \
711
k = (q0 ^ q1) & SALT; \
712
B.b32.i0 = k ^ q0 ^ kp->b32.i0; \
713
B.b32.i1 = k ^ q1 ^ kp->b32.i1; \
712
k = ((q0) ^ (q1)) & SALT; \
713
B.b32.i0 = k ^ (q0) ^ kp->b32.i0; \
714
B.b32.i1 = k ^ (q1) ^ kp->b32.i1; \
714
715
kp = (C_block *)((char *)kp+ks_inc); \
717
DOXOR((p0), (p1), 0); \
718
DOXOR((p0), (p1), 1); \
719
DOXOR((p0), (p1), 2); \
720
DOXOR((p0), (p1), 3); \
721
DOXOR((p0), (p1), 4); \
722
DOXOR((p0), (p1), 5); \
723
DOXOR((p0), (p1), 6); \
724
DOXOR((p0), (p1), 7);
725
726
CRUNCH(L0, L1, R0, R1);
726
727
CRUNCH(R0, R1, L0, L1);