~zooko/cryptopp/trunk

« back to all changes in this revision

Viewing changes to rijndael.cpp

Committer: weidai
Date: 2006-12-09 17:18:13 UTC
Revision ID: svn-v4:57ff6487-cd31-0410-9ec3-f628ee90f5f0:trunk/c5:247

add Salsa20 cipher

files added:
crypto++.mcp

haval.cpp

haval.h

havalcer.dat

md5mac.cpp

md5mac.h

xormac.h

files removed:
TestVectors/sosemanuk.txt

TestVectors/vmac.txt

cpu.cpp

cpu.h

cryptest_bds.bdsgroup

cryptest_bds.bdsproj

cryptest_bds.bpf

cryptlib_bds.bdsproj

cryptlib_bds.cpp

serpentp.h

sosemanuk.cpp

sosemanuk.h

vmac.cpp

vmac.h

x64masm.asm

files modified:
3way.cpp

3way.h

GNUmakefile

License.txt

Readme.txt

TestVectors/aes.txt

TestVectors/all.txt

TestVectors/camellia.txt

TestVectors/panama.txt

TestVectors/salsa.txt

TestVectors/seal.txt

TestVectors/shacal2.txt

TestVectors/tea.txt

adler32.h

algebra.cpp

algparam.h

arc4.cpp

arc4.h

argnames.h

asn.cpp

asn.h

bench.cpp

bench.h

bench2.cpp

blowfish.cpp

blowfish.h

blumshub.cpp

blumshub.h

camellia.cpp

camellia.h

cast.cpp

cast.h

cbcmac.cpp

cbcmac.h

channels.h

config.h

crc.h

cryptdll.dsp

cryptdll.vcproj

cryptest.dsp

cryptest.vcproj

cryptlib.cpp

cryptlib.dsp

cryptlib.h

cryptlib.vcproj

cryptopp.rc

datatest.cpp

des.cpp

des.h

dh2.cpp

dh2.h

dll.cpp

dlltest.cpp

dlltest.dsp

dlltest.vcproj

dmac.h

eccrypto.cpp

eccrypto.h

elgamal.h

eprecomp.cpp

esign.cpp

esign.h

factory.h

files.cpp

filters.cpp

filters.h

fipsalgt.cpp

fipstest.cpp

gf2_32.cpp

gfpcrypt.cpp

gfpcrypt.h

gost.cpp

gost.h

hmac.cpp

hmac.h

hrtimer.cpp

hrtimer.h

ida.cpp

idea.cpp

idea.h

integer.cpp

integer.h

iterhash.cpp

iterhash.h

lubyrack.h

mars.cpp

mars.h

md2.cpp

md2.h

md4.cpp

md4.h

md5.cpp

md5.h

mdc.h

misc.cpp

misc.h

modarith.h

modes.cpp

modes.h

nbtheory.cpp

network.cpp

network.h

osrng.cpp

osrng.h

panama.cpp

panama.h

pkcspad.cpp

pkcspad.h

pssr.cpp

pubkey.h

randpool.cpp

randpool.h

rc2.cpp

rc2.h

rc5.cpp

rc5.h

rc6.cpp

rc6.h

rdtables.cpp

regtest.cpp

rijndael.cpp

rijndael.h

rng.cpp

rng.h

rsa.cpp

rsa.h

rw.cpp

rw.h

safer.cpp

safer.h

salsa.cpp

salsa.h

seal.cpp

seal.h

secblock.h

seckey.h

serpent.cpp

serpent.h

sha.cpp

sha.h

shacal2.cpp

shacal2.h

shark.cpp

shark.h

simple.h

skipjack.cpp

skipjack.h

smartptr.h

socketft.h

square.cpp

square.h

strciphr.cpp

strciphr.h

tea.cpp

tea.h

test.cpp

tiger.cpp

tiger.h

tigertab.cpp

trunhash.h

ttmac.cpp

ttmac.h

twofish.cpp

twofish.h

usage.dat

validat1.cpp

validat2.cpp

validat3.cpp

validate.h

wait.cpp

wake.cpp

wake.h

whrlpool.cpp

whrlpool.h

words.h

zdeflate.cpp

Show diffs side-by-side

added added

removed removed

rijndael.cpp

#include "rijndael.h"

#include "misc.h"

#include "cpu.h"

NAMESPACE_BEGIN(CryptoPP)

void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)

void Rijndael::Base::UncheckedSetKey(CipherDir dir, const byte *userKey, unsigned int keylen)

{

AssertValidKeyLength(keylen);

word32 temp, *rk = m_key;

const word32 *rc = rcon;

unsigned int i=0;

GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);

103

rk += keylen/4;

104

}

105

106

if (!IsForwardTransformation())

106

if (dir == DECRYPTION)

107

{

108

unsigned int i, j;

109

rk = m_key;

119

for (i = 1; i < m_rounds; i++) {

120

rk += 4;

121

rk[0] =

122

Td[0*256+Se[GETBYTE(rk[0], 3)]] ^

123

Td[1*256+Se[GETBYTE(rk[0], 2)]] ^

124

Td[2*256+Se[GETBYTE(rk[0], 1)]] ^

125

Td[3*256+Se[GETBYTE(rk[0], 0)]];

122

Td0[Se[GETBYTE(rk[0], 3)]] ^

123

Td1[Se[GETBYTE(rk[0], 2)]] ^

124

Td2[Se[GETBYTE(rk[0], 1)]] ^

125

Td3[Se[GETBYTE(rk[0], 0)]];

126

rk[1] =

127

Td[0*256+Se[GETBYTE(rk[1], 3)]] ^

128

Td[1*256+Se[GETBYTE(rk[1], 2)]] ^

129

Td[2*256+Se[GETBYTE(rk[1], 1)]] ^

130

Td[3*256+Se[GETBYTE(rk[1], 0)]];

127

Td0[Se[GETBYTE(rk[1], 3)]] ^

128

Td1[Se[GETBYTE(rk[1], 2)]] ^

129

Td2[Se[GETBYTE(rk[1], 1)]] ^

130

Td3[Se[GETBYTE(rk[1], 0)]];

131

rk[2] =

132

Td[0*256+Se[GETBYTE(rk[2], 3)]] ^

133

Td[1*256+Se[GETBYTE(rk[2], 2)]] ^

134

Td[2*256+Se[GETBYTE(rk[2], 1)]] ^

135

Td[3*256+Se[GETBYTE(rk[2], 0)]];

132

Td0[Se[GETBYTE(rk[2], 3)]] ^

133

Td1[Se[GETBYTE(rk[2], 2)]] ^

134

Td2[Se[GETBYTE(rk[2], 1)]] ^

135

Td3[Se[GETBYTE(rk[2], 0)]];

136

rk[3] =

137

Td[0*256+Se[GETBYTE(rk[3], 3)]] ^

138

Td[1*256+Se[GETBYTE(rk[3], 2)]] ^

139

Td[2*256+Se[GETBYTE(rk[3], 1)]] ^

140

Td[3*256+Se[GETBYTE(rk[3], 0)]];

137

Td0[Se[GETBYTE(rk[3], 3)]] ^

138

Td1[Se[GETBYTE(rk[3], 2)]] ^

139

Td2[Se[GETBYTE(rk[3], 1)]] ^

140

Td3[Se[GETBYTE(rk[3], 0)]];

141

}

142

}

143

145

ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);

146

}

147

148

#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code

148

const static unsigned int s_lineSizeDiv4 = CRYPTOPP_L1_CACHE_LINE_SIZE/4;

149

#ifdef IS_BIG_ENDIAN

150

const static unsigned int s_i3=3, s_i2=2, s_i1=1, s_i0=0;

151

#else

152

const static unsigned int s_i3=0, s_i2=1, s_i1=2, s_i0=3;

153

#endif

149

154

150

155

void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const

151

156

{

152

#if defined(CRYPTOPP_X86_ASM_AVAILABLE)

153

if (HasMMX())

154

{

155

const word32 *k = m_key;

156

const word32 *kLoopEnd = k + m_rounds*4;

157

#if CRYPTOPP_BOOL_X64

158

#define K_REG r8

159

#define K_END_REG r9

160

#define SAVE_K

161

#define RESTORE_K

162

#define RESTORE_K_END

163

#define SAVE_0(x) AS2(mov r10d, x)

164

#define SAVE_1(x) AS2(mov r11d, x)

165

#define SAVE_2(x) AS2(mov r12d, x)

166

#define RESTORE_0(x) AS2(mov x, r10d)

167

#define RESTORE_1(x) AS2(mov x, r11d)

168

#define RESTORE_2(x) AS2(mov x, r12d)

169

#else

170

#define K_REG esi

171

#define K_END_REG edi

172

#define SAVE_K AS2(movd mm4, esi)

173

#define RESTORE_K AS2(movd esi, mm4)

174

#define RESTORE_K_END AS2(movd edi, mm5)

175

#define SAVE_0(x) AS2(movd mm0, x)

176

#define SAVE_1(x) AS2(movd mm1, x)

177

#define SAVE_2(x) AS2(movd mm2, x)

178

#define RESTORE_0(x) AS2(movd x, mm0)

179

#define RESTORE_1(x) AS2(movd x, mm1)

180

#define RESTORE_2(x) AS2(movd x, mm2)

181

#endif

182

#ifdef __GNUC__

183

word32 t0, t1, t2, t3;

184

__asm__ __volatile__

185

(

186

".intel_syntax noprefix;"

187

AS_PUSH( bx)

188

AS_PUSH( bp)

189

AS2( mov WORD_REG(bp), WORD_REG(ax))

190

#if CRYPTOPP_BOOL_X64

191

// save these manually. clobber list doesn't seem to work as of GCC 4.1.0

192

AS1( pushq K_REG)

193

AS1( pushq K_END_REG)

194

AS1( pushq r10)

195

AS1( pushq r11)

196

AS1( pushq r12)

197

AS2( mov K_REG, rsi)

198

AS2( mov K_END_REG, rcx)

199

#else

200

AS2( movd mm5, ecx)

201

#endif

202

#else

203

#if _MSC_VER < 1300

204

const word32 *t = Te;

205

AS2( mov eax, t)

206

#endif

207

AS2( mov edx, g_cacheLineSize)

208

AS2( mov WORD_REG(di), inBlock)

209

AS2( mov K_REG, k)

210

AS2( movd mm5, kLoopEnd)

211

#if _MSC_VER < 1300

212

AS_PUSH( bx)

213

AS_PUSH( bp)

214

AS2( mov ebp, eax)

215

#else

216

AS_PUSH( bp)

217

AS2( lea ebp, Te)

218

#endif

219

#endif

220

AS2( mov eax, [K_REG+0*4]) // s0

221

AS2( xor eax, [WORD_REG(di)+0*4])

222

SAVE_0(eax)

223

AS2( mov ebx, [K_REG+1*4])

224

AS2( xor ebx, [WORD_REG(di)+1*4])

225

SAVE_1(ebx)

226

AS2( and ebx, eax)

227

AS2( mov eax, [K_REG+2*4])

228

AS2( xor eax, [WORD_REG(di)+2*4])

229

SAVE_2(eax)

230

AS2( and ebx, eax)

231

AS2( mov ecx, [K_REG+3*4])

232

AS2( xor ecx, [WORD_REG(di)+3*4])

233

AS2( and ebx, ecx)

234

235

// read Te0 into L1 cache. this code could be simplifed by using lfence, but that is an SSE2 instruction

236

AS2( and ebx, 0)

237

AS2( mov edi, ebx) // make index depend on previous loads to simulate lfence

238

ASL(2)

239

AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])

240

AS2( add edi, edx)

241

AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])

242

AS2( add edi, edx)

243

AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])

244

AS2( add edi, edx)

245

AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])

246

AS2( add edi, edx)

247

AS2( cmp edi, 1024)

248

ASJ( jl, 2, b)

249

AS2( and ebx, [WORD_REG(bp)+1020])

250

#if CRYPTOPP_BOOL_X64

251

AS2( xor r10d, ebx)

252

AS2( xor r11d, ebx)

253

AS2( xor r12d, ebx)

254

#else

255

AS2( movd mm6, ebx)

256

AS2( pxor mm2, mm6)

257

AS2( pxor mm1, mm6)

258

AS2( pxor mm0, mm6)

259

#endif

260

AS2( xor ecx, ebx)

261

262

AS2( mov edi, [K_REG+4*4]) // t0

263

AS2( mov eax, [K_REG+5*4])

264

AS2( mov ebx, [K_REG+6*4])

265

AS2( mov edx, [K_REG+7*4])

266

AS2( add K_REG, 8*4)

267

SAVE_K

268

269

#define QUARTER_ROUND(t, a, b, c, d) \

270

AS2(movzx esi, t##l)\

271

AS2(d, [WORD_REG(bp)+0*1024+4*WORD_REG(si)])\

272

AS2(movzx esi, t##h)\

273

AS2(c, [WORD_REG(bp)+1*1024+4*WORD_REG(si)])\

274

AS2(shr e##t##x, 16)\

275

AS2(movzx esi, t##l)\

276

AS2(b, [WORD_REG(bp)+2*1024+4*WORD_REG(si)])\

277

AS2(movzx esi, t##h)\

278

AS2(a, [WORD_REG(bp)+3*1024+4*WORD_REG(si)])

279

280

#define s0 xor edi

281

#define s1 xor eax

282

#define s2 xor ebx

283

#define s3 xor ecx

284

#define t0 xor edi

285

#define t1 xor eax

286

#define t2 xor ebx

287

#define t3 xor edx

288

289

QUARTER_ROUND(c, t0, t1, t2, t3)

290

RESTORE_2(ecx)

291

QUARTER_ROUND(c, t3, t0, t1, t2)

292

RESTORE_1(ecx)

293

QUARTER_ROUND(c, t2, t3, t0, t1)

294

RESTORE_0(ecx)

295

QUARTER_ROUND(c, t1, t2, t3, t0)

296

SAVE_2(ebx)

297

SAVE_1(eax)

298

SAVE_0(edi)

299

#undef QUARTER_ROUND

300

301

RESTORE_K

302

303

ASL(0)

304

AS2( mov edi, [K_REG+0*4])

305

AS2( mov eax, [K_REG+1*4])

306

AS2( mov ebx, [K_REG+2*4])

307

AS2( mov ecx, [K_REG+3*4])

308

309

#define QUARTER_ROUND(t, a, b, c, d) \

310

AS2(movzx esi, t##l)\

311

AS2(a, [WORD_REG(bp)+3*1024+4*WORD_REG(si)])\

312

AS2(movzx esi, t##h)\

313

AS2(b, [WORD_REG(bp)+2*1024+4*WORD_REG(si)])\

314

AS2(shr e##t##x, 16)\

315

AS2(movzx esi, t##l)\

316

AS2(c, [WORD_REG(bp)+1*1024+4*WORD_REG(si)])\

317

AS2(movzx esi, t##h)\

318

AS2(d, [WORD_REG(bp)+0*1024+4*WORD_REG(si)])

319

320

QUARTER_ROUND(d, s0, s1, s2, s3)

321

RESTORE_2(edx)

322

QUARTER_ROUND(d, s3, s0, s1, s2)

323

RESTORE_1(edx)

324

QUARTER_ROUND(d, s2, s3, s0, s1)

325

RESTORE_0(edx)

326

QUARTER_ROUND(d, s1, s2, s3, s0)

327

RESTORE_K

328

SAVE_2(ebx)

329

SAVE_1(eax)

330

SAVE_0(edi)

331

332

AS2( mov edi, [K_REG+4*4])

333

AS2( mov eax, [K_REG+5*4])

334

AS2( mov ebx, [K_REG+6*4])

335

AS2( mov edx, [K_REG+7*4])

336

337

QUARTER_ROUND(c, t0, t1, t2, t3)

338

RESTORE_2(ecx)

339

QUARTER_ROUND(c, t3, t0, t1, t2)

340

RESTORE_1(ecx)

341

QUARTER_ROUND(c, t2, t3, t0, t1)

342

RESTORE_0(ecx)

343

QUARTER_ROUND(c, t1, t2, t3, t0)

344

SAVE_2(ebx)

345

SAVE_1(eax)

346

SAVE_0(edi)

347

348

RESTORE_K

349

RESTORE_K_END

350

AS2( add K_REG, 8*4)

351

SAVE_K

352

AS2( cmp K_END_REG, K_REG)

353

ASJ( jne, 0, b)

354

355

#undef QUARTER_ROUND

356

#undef s0

357

#undef s1

358

#undef s2

359

#undef s3

360

#undef t0

361

#undef t1

362

#undef t2

363

#undef t3

364

365

AS2( mov eax, [K_END_REG+0*4])

366

AS2( mov ecx, [K_END_REG+1*4])

367

AS2( mov esi, [K_END_REG+2*4])

368

AS2( mov edi, [K_END_REG+3*4])

369

370

#define QUARTER_ROUND(a, b, c, d) \

371

AS2( movzx ebx, dl)\

372

AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\

373

AS2( shl ebx, 3*8)\

374

AS2( xor a, ebx)\

375

AS2( movzx ebx, dh)\

376

AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\

377

AS2( shl ebx, 2*8)\

378

AS2( xor b, ebx)\

379

AS2( shr edx, 16)\

380

AS2( movzx ebx, dl)\

381

AS2( shr edx, 8)\

382

AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\

383

AS2( shl ebx, 1*8)\

384

AS2( xor c, ebx)\

385

AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(dx)])\

386

AS2( xor d, ebx)

387

388

QUARTER_ROUND(eax, ecx, esi, edi)

389

RESTORE_2(edx)

390

QUARTER_ROUND(edi, eax, ecx, esi)

391

RESTORE_1(edx)

392

QUARTER_ROUND(esi, edi, eax, ecx)

393

RESTORE_0(edx)

394

QUARTER_ROUND(ecx, esi, edi, eax)

395

396

#undef QUARTER_ROUND

397

398

#if CRYPTOPP_BOOL_X64

399

AS1(popq r12)

400

AS1(popq r11)

401

AS1(popq r10)

402

AS1(popq K_END_REG)

403

AS1(popq K_REG)

404

#else

405

AS1(emms)

406

#endif

407

AS_POP( bp)

408

409

#if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)

410

AS_POP( bx)

411

#endif

412

#ifdef __GNUC__

413

".att_syntax prefix;"

414

: "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)

415

: "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)

416

: "memory", "cc"

417

);

418

419

if (xorBlock)

420

{

421

t0 ^= ((const word32 *)xorBlock)[0];

422

t1 ^= ((const word32 *)xorBlock)[1];

423

t2 ^= ((const word32 *)xorBlock)[2];

424

t3 ^= ((const word32 *)xorBlock)[3];

425

}

426

((word32 *)outBlock)[0] = t0;

427

((word32 *)outBlock)[1] = t1;

428

((word32 *)outBlock)[2] = t2;

429

((word32 *)outBlock)[3] = t3;

430

#else

431

AS2( mov WORD_REG(bx), xorBlock)

432

AS2( test WORD_REG(bx), WORD_REG(bx))

433

ASJ( jz, 1, f)

434

AS2( xor eax, [WORD_REG(bx)+0*4])

435

AS2( xor ecx, [WORD_REG(bx)+1*4])

436

AS2( xor esi, [WORD_REG(bx)+2*4])

437

AS2( xor edi, [WORD_REG(bx)+3*4])

438

ASL(1)

439

AS2( mov WORD_REG(bx), outBlock)

440

AS2( mov [WORD_REG(bx)+0*4], eax)

441

AS2( mov [WORD_REG(bx)+1*4], ecx)

442

AS2( mov [WORD_REG(bx)+2*4], esi)

443

AS2( mov [WORD_REG(bx)+3*4], edi)

444

#endif

445

}

446

else

447

#endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE

448

{

449

157

word32 s0, s1, s2, s3, t0, t1, t2, t3;

450

158

const word32 *rk = m_key;

451

159

460

168

rk += 8;

461

169

462

170

// timing attack countermeasure. see comments at top for more details

463

const int cacheLineSize = GetCacheLineSize();

464

171

unsigned int i;

465

172

word32 u = 0;

466

for (i=0; i<1024; i+=cacheLineSize)

467

u &= *(const word32 *)(((const byte *)Te)+i);

468

u &= Te[255];

173

for (i=0; i<sizeof(Te0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)

174

u &= (Te0[i+0*s_lineSizeDiv4] & Te0[i+2*s_lineSizeDiv4]) & (Te0[i+1*s_lineSizeDiv4] & Te0[i+3*s_lineSizeDiv4]);

469

175

s0 |= u; s1 |= u; s2 |= u; s3 |= u;

470

176

471

177

// first round

472

#ifdef IS_BIG_ENDIAN

473

#define QUARTER_ROUND(t, a, b, c, d) \

474

a ^= rotrFixed(Te[byte(t)], 24); t >>= 8;\

475

b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\

476

c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\

477

d ^= Te[t];

478

#else

479

#define QUARTER_ROUND(t, a, b, c, d) \

480

d ^= Te[byte(t)]; t >>= 8;\

481

c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\

482

b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\

483

a ^= rotrFixed(Te[t], 24);

484

#endif

485

486

QUARTER_ROUND(s3, t0, t1, t2, t3)

487

QUARTER_ROUND(s2, t3, t0, t1, t2)

488

QUARTER_ROUND(s1, t2, t3, t0, t1)

489

QUARTER_ROUND(s0, t1, t2, t3, t0)

490

#undef QUARTER_ROUND

178

t0 ^=

179

Te0[GETBYTE(s0, s_i3)] ^

180

rotrFixed(Te0[GETBYTE(s1, s_i2)], 8) ^

181

rotrFixed(Te0[GETBYTE(s2, s_i1)], 16) ^

182

rotrFixed(Te0[GETBYTE(s3, s_i0)], 24);

183

t1 ^=

184

Te0[GETBYTE(s1, s_i3)] ^

185

rotrFixed(Te0[GETBYTE(s2, s_i2)], 8) ^

186

rotrFixed(Te0[GETBYTE(s3, s_i1)], 16) ^

187

rotrFixed(Te0[GETBYTE(s0, s_i0)], 24);

188

t2 ^=

189

Te0[GETBYTE(s2, s_i3)] ^

190

rotrFixed(Te0[GETBYTE(s3, s_i2)], 8) ^

191

rotrFixed(Te0[GETBYTE(s0, s_i1)], 16) ^

192

rotrFixed(Te0[GETBYTE(s1, s_i0)], 24);

193

t3 ^=

194

Te0[GETBYTE(s3, s_i3)] ^

195

rotrFixed(Te0[GETBYTE(s0, s_i2)], 8) ^

196

rotrFixed(Te0[GETBYTE(s1, s_i1)], 16) ^

197

rotrFixed(Te0[GETBYTE(s2, s_i0)], 24);

491

198

492

199

// Nr - 2 full rounds:

493

200

unsigned int r = m_rounds/2 - 1;

494

201

495

202

{

496

#define QUARTER_ROUND(t, a, b, c, d) \

497

a ^= Te[3*256+byte(t)]; t >>= 8;\

498

b ^= Te[2*256+byte(t)]; t >>= 8;\

499

c ^= Te[1*256+byte(t)]; t >>= 8;\

500

d ^= Te[t];

501

502

s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];

503

504

QUARTER_ROUND(t3, s0, s1, s2, s3)

505

QUARTER_ROUND(t2, s3, s0, s1, s2)

506

QUARTER_ROUND(t1, s2, s3, s0, s1)

507

QUARTER_ROUND(t0, s1, s2, s3, s0)

508

509

t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];

510

511

QUARTER_ROUND(s3, t0, t1, t2, t3)

512

QUARTER_ROUND(s2, t3, t0, t1, t2)

513

QUARTER_ROUND(s1, t2, t3, t0, t1)

514

QUARTER_ROUND(s0, t1, t2, t3, t0)

515

#undef QUARTER_ROUND

203

s0 =

204

Te0[GETBYTE(t0, 3)] ^

205

Te1[GETBYTE(t1, 2)] ^

206

Te2[GETBYTE(t2, 1)] ^

207

Te3[GETBYTE(t3, 0)] ^

208

rk[0];

209

s1 =

210

Te0[GETBYTE(t1, 3)] ^

211

Te1[GETBYTE(t2, 2)] ^

212

Te2[GETBYTE(t3, 1)] ^

213

Te3[GETBYTE(t0, 0)] ^

214

rk[1];

215

s2 =

216

Te0[GETBYTE(t2, 3)] ^

217

Te1[GETBYTE(t3, 2)] ^

218

Te2[GETBYTE(t0, 1)] ^

219

Te3[GETBYTE(t1, 0)] ^

220

rk[2];

221

s3 =

222

Te0[GETBYTE(t3, 3)] ^

223

Te1[GETBYTE(t0, 2)] ^

224

Te2[GETBYTE(t1, 1)] ^

225

Te3[GETBYTE(t2, 0)] ^

226

rk[3];

227

228

t0 =

229

Te0[GETBYTE(s0, 3)] ^

230

Te1[GETBYTE(s1, 2)] ^

231

Te2[GETBYTE(s2, 1)] ^

232

Te3[GETBYTE(s3, 0)] ^

233

rk[4];

234

t1 =

235

Te0[GETBYTE(s1, 3)] ^

236

Te1[GETBYTE(s2, 2)] ^

237

Te2[GETBYTE(s3, 1)] ^

238

Te3[GETBYTE(s0, 0)] ^

239

rk[5];

240

t2 =

241

Te0[GETBYTE(s2, 3)] ^

242

Te1[GETBYTE(s3, 2)] ^

243

Te2[GETBYTE(s0, 1)] ^

244

Te3[GETBYTE(s1, 0)] ^

245

rk[6];

246

t3 =

247

Te0[GETBYTE(s3, 3)] ^

248

Te1[GETBYTE(s0, 2)] ^

249

Te2[GETBYTE(s1, 1)] ^

250

Te3[GETBYTE(s2, 0)] ^

251

rk[7];

516

252

517

253

rk += 8;

518

254

} while (--r);

519

255

520

256

// timing attack countermeasure. see comments at top for more details

521

257

u = 0;

522

for (i=0; i<256; i+=cacheLineSize)

523

u &= *(const word32 *)(Se+i);

524

u &= *(const word32 *)(Se+252);

258

for (i=0; i<sizeof(Se)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)

259

u &= (((word32*)Se)[i+0*s_lineSizeDiv4] & ((word32*)Se)[i+2*s_lineSizeDiv4]) & (((word32*)Se)[i+1*s_lineSizeDiv4] & ((word32*)Se)[i+3*s_lineSizeDiv4]);

525

260

t0 |= u; t1 |= u; t2 |= u; t3 |= u;

526

261

527

262

word32 tbw[4];

529

264

word32 *const obw = (word32 *)outBlock;

530

265

const word32 *const xbw = (const word32 *)xorBlock;

531

266

532

#define QUARTER_ROUND(t, a, b, c, d) \

533

tempBlock[a] = Se[byte(t)]; t >>= 8;\

534

tempBlock[b] = Se[byte(t)]; t >>= 8;\

535

tempBlock[c] = Se[byte(t)]; t >>= 8;\

536

tempBlock[d] = Se[t];

537

538

QUARTER_ROUND(t2, 15, 2, 5, 8)

539

QUARTER_ROUND(t1, 11, 14, 1, 4)

540

QUARTER_ROUND(t0, 7, 10, 13, 0)

541

QUARTER_ROUND(t3, 3, 6, 9, 12)

542

#undef QUARTER_ROUND

267

// last round

268

tempBlock[0] = Se[GETBYTE(t0, 3)];

269

tempBlock[1] = Se[GETBYTE(t1, 2)];

270

tempBlock[2] = Se[GETBYTE(t2, 1)];

271

tempBlock[3] = Se[GETBYTE(t3, 0)];

272

tempBlock[4] = Se[GETBYTE(t1, 3)];

273

tempBlock[5] = Se[GETBYTE(t2, 2)];

274

tempBlock[6] = Se[GETBYTE(t3, 1)];

275

tempBlock[7] = Se[GETBYTE(t0, 0)];

276

tempBlock[8] = Se[GETBYTE(t2, 3)];

277

tempBlock[9] = Se[GETBYTE(t3, 2)];

278

tempBlock[10] = Se[GETBYTE(t0, 1)];

279

tempBlock[11] = Se[GETBYTE(t1, 0)];

280

tempBlock[12] = Se[GETBYTE(t3, 3)];

281

tempBlock[13] = Se[GETBYTE(t0, 2)];

282

tempBlock[14] = Se[GETBYTE(t1, 1)];

283

tempBlock[15] = Se[GETBYTE(t2, 0)];

543

284

544

285

if (xbw)

545

286

{

555

296

obw[2] = tbw[2] ^ rk[2];

556

297

obw[3] = tbw[3] ^ rk[3];

557

298

}

558

}

559

299

}

560

300

561

301

void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const

562

302

{

563

303

word32 s0, s1, s2, s3, t0, t1, t2, t3;

564

const word32 *rk = m_key;

304

const word32 *rk = m_key;

565

305

566

306

s0 = ((const word32 *)inBlock)[0] ^ rk[0];

567

307

s1 = ((const word32 *)inBlock)[1] ^ rk[1];

574

314

rk += 8;

575

315

576

316

// timing attack countermeasure. see comments at top for more details

577

const int cacheLineSize = GetCacheLineSize();

578

317

unsigned int i;

579

318

word32 u = 0;

580

for (i=0; i<1024; i+=cacheLineSize)

581

u &= *(const word32 *)(((const byte *)Td)+i);

582

u &= Td[255];

319

for (i=0; i<sizeof(Td0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)

320

u &= (Td0[i+0*s_lineSizeDiv4] & Td0[i+2*s_lineSizeDiv4]) & (Td0[i+1*s_lineSizeDiv4] & Td0[i+3*s_lineSizeDiv4]);

583

321

s0 |= u; s1 |= u; s2 |= u; s3 |= u;

584

322

585

323

// first round

586

#ifdef IS_BIG_ENDIAN

587

#define QUARTER_ROUND(t, a, b, c, d) \

588

a ^= rotrFixed(Td[byte(t)], 24); t >>= 8;\

589

b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\

590

c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\

591

d ^= Td[t];

592

#else

593

#define QUARTER_ROUND(t, a, b, c, d) \

594

d ^= Td[byte(t)]; t >>= 8;\

595

c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\

596

b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\

597

a ^= rotrFixed(Td[t], 24);

598

#endif

599

600

QUARTER_ROUND(s3, t2, t1, t0, t3)

601

QUARTER_ROUND(s2, t1, t0, t3, t2)

602

QUARTER_ROUND(s1, t0, t3, t2, t1)

603

QUARTER_ROUND(s0, t3, t2, t1, t0)

604

#undef QUARTER_ROUND

324

t0 ^=

325

Td0[GETBYTE(s0, s_i3)] ^

326

rotrFixed(Td0[GETBYTE(s3, s_i2)], 8) ^

327

rotrFixed(Td0[GETBYTE(s2, s_i1)], 16) ^

328

rotrFixed(Td0[GETBYTE(s1, s_i0)], 24);

329

t1 ^=

330

Td0[GETBYTE(s1, s_i3)] ^

331

rotrFixed(Td0[GETBYTE(s0, s_i2)], 8) ^

332

rotrFixed(Td0[GETBYTE(s3, s_i1)], 16) ^

333

rotrFixed(Td0[GETBYTE(s2, s_i0)], 24);

334

t2 ^=

335

Td0[GETBYTE(s2, s_i3)] ^

336

rotrFixed(Td0[GETBYTE(s1, s_i2)], 8) ^

337

rotrFixed(Td0[GETBYTE(s0, s_i1)], 16) ^

338

rotrFixed(Td0[GETBYTE(s3, s_i0)], 24);

339

t3 ^=

340

Td0[GETBYTE(s3, s_i3)] ^

341

rotrFixed(Td0[GETBYTE(s2, s_i2)], 8) ^

342

rotrFixed(Td0[GETBYTE(s1, s_i1)], 16) ^

343

rotrFixed(Td0[GETBYTE(s0, s_i0)], 24);

605

344

606

345

// Nr - 2 full rounds:

607

346

unsigned int r = m_rounds/2 - 1;

608

347

609

348

{

610

#define QUARTER_ROUND(t, a, b, c, d) \

611

a ^= Td[3*256+byte(t)]; t >>= 8;\

612

b ^= Td[2*256+byte(t)]; t >>= 8;\

613

c ^= Td[1*256+byte(t)]; t >>= 8;\

614

d ^= Td[t];

615

616

s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];

617

618

QUARTER_ROUND(t3, s2, s1, s0, s3)

619

QUARTER_ROUND(t2, s1, s0, s3, s2)

620

QUARTER_ROUND(t1, s0, s3, s2, s1)

621

QUARTER_ROUND(t0, s3, s2, s1, s0)

622

623

t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];

624

625

QUARTER_ROUND(s3, t2, t1, t0, t3)

626

QUARTER_ROUND(s2, t1, t0, t3, t2)

627

QUARTER_ROUND(s1, t0, t3, t2, t1)

628

QUARTER_ROUND(s0, t3, t2, t1, t0)

629

#undef QUARTER_ROUND

349

s0 =

350

Td0[GETBYTE(t0, 3)] ^

351

Td1[GETBYTE(t3, 2)] ^

352

Td2[GETBYTE(t2, 1)] ^

353

Td3[GETBYTE(t1, 0)] ^

354

rk[0];

355

s1 =

356

Td0[GETBYTE(t1, 3)] ^

357

Td1[GETBYTE(t0, 2)] ^

358

Td2[GETBYTE(t3, 1)] ^

359

Td3[GETBYTE(t2, 0)] ^

360

rk[1];

361

s2 =

362

Td0[GETBYTE(t2, 3)] ^

363

Td1[GETBYTE(t1, 2)] ^

364

Td2[GETBYTE(t0, 1)] ^

365

Td3[GETBYTE(t3, 0)] ^

366

rk[2];

367

s3 =

368

Td0[GETBYTE(t3, 3)] ^

369

Td1[GETBYTE(t2, 2)] ^

370

Td2[GETBYTE(t1, 1)] ^

371

Td3[GETBYTE(t0, 0)] ^

372

rk[3];

373

374

t0 =

375

Td0[GETBYTE(s0, 3)] ^

376

Td1[GETBYTE(s3, 2)] ^

377

Td2[GETBYTE(s2, 1)] ^

378

Td3[GETBYTE(s1, 0)] ^

379

rk[4];

380

t1 =

381

Td0[GETBYTE(s1, 3)] ^

382

Td1[GETBYTE(s0, 2)] ^

383

Td2[GETBYTE(s3, 1)] ^

384

Td3[GETBYTE(s2, 0)] ^

385

rk[5];

386

t2 =

387

Td0[GETBYTE(s2, 3)] ^

388

Td1[GETBYTE(s1, 2)] ^

389

Td2[GETBYTE(s0, 1)] ^

390

Td3[GETBYTE(s3, 0)] ^

391

rk[6];

392

t3 =

393

Td0[GETBYTE(s3, 3)] ^

394

Td1[GETBYTE(s2, 2)] ^

395

Td2[GETBYTE(s1, 1)] ^

396

Td3[GETBYTE(s0, 0)] ^

397

rk[7];

630

398

631

399

rk += 8;

632

400

} while (--r);

633

401

634

402

// timing attack countermeasure. see comments at top for more details

635

403

u = 0;

636

for (i=0; i<256; i+=cacheLineSize)

637

u &= *(const word32 *)(Sd+i);

638

u &= *(const word32 *)(Sd+252);

404

for (i=0; i<sizeof(Sd)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)

405

u &= (((word32*)Sd)[i+0*s_lineSizeDiv4] & ((word32*)Sd)[i+2*s_lineSizeDiv4]) & (((word32*)Sd)[i+1*s_lineSizeDiv4] & ((word32*)Sd)[i+3*s_lineSizeDiv4]);

639

406

t0 |= u; t1 |= u; t2 |= u; t3 |= u;

640

407

641

408

word32 tbw[4];

643

410

word32 *const obw = (word32 *)outBlock;

644

411

const word32 *const xbw = (const word32 *)xorBlock;

645

412

646

#define QUARTER_ROUND(t, a, b, c, d) \

647

tempBlock[a] = Sd[byte(t)]; t >>= 8;\

648

tempBlock[b] = Sd[byte(t)]; t >>= 8;\

649

tempBlock[c] = Sd[byte(t)]; t >>= 8;\

650

tempBlock[d] = Sd[t];

651

652

QUARTER_ROUND(t2, 7, 2, 13, 8)

653

QUARTER_ROUND(t1, 3, 14, 9, 4)

654

QUARTER_ROUND(t0, 15, 10, 5, 0)

655

QUARTER_ROUND(t3, 11, 6, 1, 12)

656

#undef QUARTER_ROUND

413

// last round

414

tempBlock[0] = Sd[GETBYTE(t0, 3)];

415

tempBlock[1] = Sd[GETBYTE(t3, 2)];

416

tempBlock[2] = Sd[GETBYTE(t2, 1)];

417

tempBlock[3] = Sd[GETBYTE(t1, 0)];

418

tempBlock[4] = Sd[GETBYTE(t1, 3)];

419

tempBlock[5] = Sd[GETBYTE(t0, 2)];

420

tempBlock[6] = Sd[GETBYTE(t3, 1)];

421

tempBlock[7] = Sd[GETBYTE(t2, 0)];

422

tempBlock[8] = Sd[GETBYTE(t2, 3)];

423

tempBlock[9] = Sd[GETBYTE(t1, 2)];

424

tempBlock[10] = Sd[GETBYTE(t0, 1)];

425

tempBlock[11] = Sd[GETBYTE(t3, 0)];

426

tempBlock[12] = Sd[GETBYTE(t3, 3)];

427

tempBlock[13] = Sd[GETBYTE(t2, 2)];

428

tempBlock[14] = Sd[GETBYTE(t1, 1)];

429

tempBlock[15] = Sd[GETBYTE(t0, 0)];

657

430

658

431

if (xbw)

659

432

{

Older »