~ubuntu-branches/ubuntu/quantal/memcached/quantal

Viewing changes to hash.c

Committer: Bazaar Package Importer
Author(s): David Martínez Moreno
Date: 2009-10-16 15:09:43 UTC
mfrom: (1.1.6 upstream) (3.1.7 sid)
Revision ID: james.westby@ubuntu.com-20091016150943-0rhh8x206ebgwzeu

Tags: 1.4.2-1

* New upstream release, primarily bugfixes, some of them critical, hence
  the urgency:
  - Reject keys larger than 250 bytes in the binary protocol.
  - Bounds checking on stats cachedump.
  - Binary protocol set+cas wasn't returning a new cas ID.
  - Binary quitq didn't actually close the connection
  - Slab boundary checking cleanup (bad logic in unreachable code)
  - Get hit memory optimizations
  - Disallow -t options that cause the server to not work
  - Killed off incomplete slab rebalance feature.
* debian/patches:
  - 01_init_script_compliant_with_LSB.patch: Remade as upstream applied a
    whitespace cleanup script that broke the patch.
  - 02_manpage_additions.patch: Added missing parameters to the memcached
    manpage.
* Removed TODO from debian/docs.

files added:
cache.c

cache.h

debian/README.source

debian/patches

debian/patches/01_init_script_compliant_with_LSB.patch

debian/patches/02_manpage_additions.patch

debian/patches/series

doc/protocol-binary-range.xml

doc/protocol-binary.xml

doc/readme.txt

doc/xml2rfc

doc/xml2rfc/reference.RFC.0768.xml

doc/xml2rfc/rfc2629-noinc.xsl

doc/xml2rfc/rfc2629-other.ent

doc/xml2rfc/rfc2629-refchk.xsl

doc/xml2rfc/rfc2629-xhtml.ent

doc/xml2rfc/rfc2629.dtd

hash.c

hash.h

protocol_binary.h

sizes.c

solaris_priv.c

t/binary.t

t/dash-M.t

t/evictions.t

t/issue_14.t

t/issue_22.t

t/issue_41.t

t/issue_42.t

t/issue_50.t

t/issue_61.t

t/issue_70.t

t/item_size_max.t

testapp.c

timedrun.c

trace.h

util.c

util.h

version.m4

files removed:
TODO

doc/binary-protocol-plan.txt

memcached_dtrace.h

t/delete-window.t

t/slab-reassign.t

files modified:
ChangeLog

Makefile.am

Makefile.in

README

assoc.c

assoc.h

config.h.in

configure

configure.ac

daemon.c

debian/changelog

debian/compat

debian/control

debian/docs

debian/memcached.default

debian/rules

debian/watch

doc/CONTRIBUTORS

doc/Doxyfile

doc/Makefile

doc/Makefile.am

doc/Makefile.in

doc/memcached.1

doc/memory_management.txt

doc/protocol.txt

items.c

items.h

memcached.c

memcached.h

memcached.spec

memcached_dtrace.d

scripts/memcached-init

scripts/memcached-tool

scripts/memcached.sysv

scripts/start-memcached

slabs.c

slabs.h

stats.c

stats.h

t/00-startup.t

t/cas.t

t/getset.t

t/incrdecr.t

t/lib/MemcachedTest.pm

t/noreply.t

t/stats.t

t/udp.t

t/whitespace.t

thread.c

Show diffs side-by-side

added added

removed removed

hash.c

/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */

* Hash table

* The hash function used here is by Bob Jenkins, 1996:

* <http://burtleburtle.net/bob/hash/doobs.html>

* "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net.

* You may use this code any way you wish, private, educational,

* or commercial. It's free."

#include "memcached.h"

* Since the hash function does bit manipulation, it needs to know

* whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG

* are set in the configure script.

#if ENDIAN_BIG == 1

# define HASH_LITTLE_ENDIAN 0

# define HASH_BIG_ENDIAN 1

#else

# if ENDIAN_LITTLE == 1

# define HASH_LITTLE_ENDIAN 1

# define HASH_BIG_ENDIAN 0

# else

# define HASH_LITTLE_ENDIAN 0

# define HASH_BIG_ENDIAN 0

# endif

#endif

#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))

-------------------------------------------------------------------------------

mix -- mix 3 32-bit values reversibly.

This is reversible, so any information in (a,b,c) before mix() is

still in (a,b,c) after mix().

If four pairs of (a,b,c) inputs are run through mix(), or through

mix() in reverse, there are at least 32 bits of the output that

are sometimes the same for one pair and different for another pair.

This was tested for:

* pairs that differed by one bit, by two bits, in any combination

of top bits of (a,b,c), or in any combination of bottom bits of

(a,b,c).

* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed

the output delta to a Gray code (a^(a>>1)) so a string of 1's (as

is commonly produced by subtraction) look like a single 1-bit

difference.

* the base values were pseudorandom, all zero but one bit set, or

all zero plus a counter that starts at zero.

Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that

satisfy this are

4 6 8 16 19 4

9 15 3 18 27 15

14 9 3 7 17 3

Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing

for "differ" defined as + with a one-bit base and a two-bit delta. I

used http://burtleburtle.net/bob/hash/avalanche.html to choose

the operations, constants, and arrangements of the variables.

This does not achieve avalanche. There are input bits of (a,b,c)

that fail to affect some output bits of (a,b,c), especially of a. The

most thoroughly mixed value is c, but it doesn't really even achieve

avalanche in c.

This allows some parallelism. Read-after-writes are good at doubling

the number of bits affected, so the goal of mixing pulls in the opposite

direction as the goal of parallelism. I did what I could. Rotates

seem to cost as much as shifts on every machine I could lay my hands

on, and rotates are much kinder to the top and bottom bits, so I used

rotates.

-------------------------------------------------------------------------------

#define mix(a,b,c) \

{ \

a -= c; a ^= rot(c, 4); c += b; \

b -= a; b ^= rot(a, 6); a += c; \

c -= b; c ^= rot(b, 8); b += a; \

a -= c; a ^= rot(c,16); c += b; \

b -= a; b ^= rot(a,19); a += c; \

c -= b; c ^= rot(b, 4); b += a; \

}

-------------------------------------------------------------------------------

final -- final mixing of 3 32-bit values (a,b,c) into c

Pairs of (a,b,c) values differing in only a few bits will usually

produce values of c that look totally different. This was tested for

* pairs that differed by one bit, by two bits, in any combination

of top bits of (a,b,c), or in any combination of bottom bits of

(a,b,c).

* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed

the output delta to a Gray code (a^(a>>1)) so a string of 1's (as

is commonly produced by subtraction) look like a single 1-bit

100

difference.

101

* the base values were pseudorandom, all zero but one bit set, or

102

all zero plus a counter that starts at zero.

103

104

These constants passed:

105

14 11 25 16 4 14 24

106

12 14 25 16 4 14 24

107

and these came close:

108

4 8 15 26 3 22 24

109

10 8 15 26 3 22 24

110

11 8 15 26 3 22 24

111

-------------------------------------------------------------------------------

112

113

#define final(a,b,c) \

114

{ \

115

c ^= b; c -= rot(b,14); \

116

a ^= c; a -= rot(c,11); \

117

b ^= a; b -= rot(a,25); \

118

c ^= b; c -= rot(b,16); \

119

a ^= c; a -= rot(c,4); \

120

b ^= a; b -= rot(a,14); \

121

c ^= b; c -= rot(b,24); \

122

}

123

124

#if HASH_LITTLE_ENDIAN == 1

125

uint32_t hash(

126

const void *key, /* the key to hash */

127

size_t length, /* length of the key */

128

const uint32_t initval) /* initval */

129

{

130

uint32_t a,b,c; /* internal state */

131

union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */

132

133

/* Set up the internal state */

134

a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;

135

136

u.ptr = key;

137

if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {

138

const uint32_t *k = key; /* read 32-bit chunks */

139

#ifdef VALGRIND

140

const uint8_t *k8;

141

#endif /* ifdef VALGRIND */

142

143

/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */

144

while (length > 12)

145

{

146

a += k[0];

147

b += k[1];

148

c += k[2];

149

mix(a,b,c);

150

length -= 12;

151

k += 3;

152

}

153

154

/*----------------------------- handle the last (probably partial) block */

155

156

* "k[2]&0xffffff" actually reads beyond the end of the string, but

157

* then masks off the part it's not allowed to read. Because the

158

* string is aligned, the masked-off tail is in the same word as the

159

* rest of the string. Every machine with memory protection I've seen

160

* does it on word boundaries, so is OK with this. But VALGRIND will

161

* still catch it and complain. The masking trick does make the hash

162

* noticably faster for short strings (like English words).

163

164

#ifndef VALGRIND

165

166

switch(length)

167

{

168

case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;

169

case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;

170

case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;

171

case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;

172

case 8 : b+=k[1]; a+=k[0]; break;

173

case 7 : b+=k[1]&0xffffff; a+=k[0]; break;

174

case 6 : b+=k[1]&0xffff; a+=k[0]; break;

175

case 5 : b+=k[1]&0xff; a+=k[0]; break;

176

case 4 : a+=k[0]; break;

177

case 3 : a+=k[0]&0xffffff; break;

178

case 2 : a+=k[0]&0xffff; break;

179

case 1 : a+=k[0]&0xff; break;

180

case 0 : return c; /* zero length strings require no mixing */

181

}

182

183

#else /* make valgrind happy */

184

185

k8 = (const uint8_t *)k;

186

switch(length)

187

{

188

case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;

189

case 11: c+=((uint32_t)k8[10])<<16; /* fall through */

190

case 10: c+=((uint32_t)k8[9])<<8; /* fall through */

191

case 9 : c+=k8[8]; /* fall through */

192

case 8 : b+=k[1]; a+=k[0]; break;

193

case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */

194

case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */

195

case 5 : b+=k8[4]; /* fall through */

196

case 4 : a+=k[0]; break;

197

case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */

198

case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */

199

case 1 : a+=k8[0]; break;

200

case 0 : return c; /* zero length strings require no mixing */

201

}

202

203

#endif /* !valgrind */

204

205

} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {

206

const uint16_t *k = key; /* read 16-bit chunks */

207

const uint8_t *k8;

208

209

/*--------------- all but last block: aligned reads and different mixing */

210

while (length > 12)

211

{

212

a += k[0] + (((uint32_t)k[1])<<16);

213

b += k[2] + (((uint32_t)k[3])<<16);

214

c += k[4] + (((uint32_t)k[5])<<16);

215

mix(a,b,c);

216

length -= 12;

217

k += 6;

218

}

219

220

/*----------------------------- handle the last (probably partial) block */

221

k8 = (const uint8_t *)k;

222

switch(length)

223

{

224

case 12: c+=k[4]+(((uint32_t)k[5])<<16);

225

b+=k[2]+(((uint32_t)k[3])<<16);

226

a+=k[0]+(((uint32_t)k[1])<<16);

227

break;

228

case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */

229

case 10: c+=k[4]; /* @fallthrough@ */

230

b+=k[2]+(((uint32_t)k[3])<<16);

231

a+=k[0]+(((uint32_t)k[1])<<16);

232

break;

233

case 9 : c+=k8[8]; /* @fallthrough */

234

case 8 : b+=k[2]+(((uint32_t)k[3])<<16);

235

a+=k[0]+(((uint32_t)k[1])<<16);

236

break;

237

case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */

238

case 6 : b+=k[2];

239

a+=k[0]+(((uint32_t)k[1])<<16);

240

break;

241

case 5 : b+=k8[4]; /* @fallthrough */

242

case 4 : a+=k[0]+(((uint32_t)k[1])<<16);

243

break;

244

case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */

245

case 2 : a+=k[0];

246

break;

247

case 1 : a+=k8[0];

248

break;

249

case 0 : return c; /* zero length strings require no mixing */

250

}

251

252

} else { /* need to read the key one byte at a time */

253

const uint8_t *k = key;

254

255

/*--------------- all but the last block: affect some 32 bits of (a,b,c) */

256

while (length > 12)

257

{

258

a += k[0];

259

a += ((uint32_t)k[1])<<8;

260

a += ((uint32_t)k[2])<<16;

261

a += ((uint32_t)k[3])<<24;

262

b += k[4];

263

b += ((uint32_t)k[5])<<8;

264

b += ((uint32_t)k[6])<<16;

265

b += ((uint32_t)k[7])<<24;

266

c += k[8];

267

c += ((uint32_t)k[9])<<8;

268

c += ((uint32_t)k[10])<<16;

269

c += ((uint32_t)k[11])<<24;

270

mix(a,b,c);

271

length -= 12;

272

k += 12;

273

}

274

275

/*-------------------------------- last block: affect all 32 bits of (c) */

276

switch(length) /* all the case statements fall through */

277

{

278

case 12: c+=((uint32_t)k[11])<<24;

279

case 11: c+=((uint32_t)k[10])<<16;

280

case 10: c+=((uint32_t)k[9])<<8;

281

case 9 : c+=k[8];

282

case 8 : b+=((uint32_t)k[7])<<24;

283

case 7 : b+=((uint32_t)k[6])<<16;

284

case 6 : b+=((uint32_t)k[5])<<8;

285

case 5 : b+=k[4];

286

case 4 : a+=((uint32_t)k[3])<<24;

287

case 3 : a+=((uint32_t)k[2])<<16;

288

case 2 : a+=((uint32_t)k[1])<<8;

289

case 1 : a+=k[0];

290

break;

291

case 0 : return c; /* zero length strings require no mixing */

292

}

293

}

294

295

final(a,b,c);

296

return c; /* zero length strings require no mixing */

297

}

298

299

#elif HASH_BIG_ENDIAN == 1

300

301

* hashbig():

302

* This is the same as hashword() on big-endian machines. It is different

303

* from hashlittle() on all machines. hashbig() takes advantage of

304

* big-endian byte ordering.

305

306

uint32_t hash( const void *key, size_t length, const uint32_t initval)

307

{

308

uint32_t a,b,c;

309

union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */

310

311

/* Set up the internal state */

312

a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;

313

314

u.ptr = key;

315

if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {

316

const uint32_t *k = key; /* read 32-bit chunks */

317

#ifdef VALGRIND

318

const uint8_t *k8;

319

#endif /* ifdef VALGRIND */

320

321

/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */

322

while (length > 12)

323

{

324

a += k[0];

325

b += k[1];

326

c += k[2];

327

mix(a,b,c);

328

length -= 12;

329

k += 3;

330

}

331

332

/*----------------------------- handle the last (probably partial) block */

333

334

* "k[2]<<8" actually reads beyond the end of the string, but

335

* then shifts out the part it's not allowed to read. Because the

336

* string is aligned, the illegal read is in the same word as the

337

* rest of the string. Every machine with memory protection I've seen

338

* does it on word boundaries, so is OK with this. But VALGRIND will

339

* still catch it and complain. The masking trick does make the hash

340

* noticably faster for short strings (like English words).

341

342

#ifndef VALGRIND

343

344

switch(length)

345

{

346

case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;

347

case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;

348

case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;

349

case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;

350

case 8 : b+=k[1]; a+=k[0]; break;

351

case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;

352

case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;

353

case 5 : b+=k[1]&0xff000000; a+=k[0]; break;

354

case 4 : a+=k[0]; break;

355

case 3 : a+=k[0]&0xffffff00; break;

356

case 2 : a+=k[0]&0xffff0000; break;

357

case 1 : a+=k[0]&0xff000000; break;

358

case 0 : return c; /* zero length strings require no mixing */

359

}

360

361

#else /* make valgrind happy */

362

363

k8 = (const uint8_t *)k;

364

switch(length) /* all the case statements fall through */

365

{

366

case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;

367

case 11: c+=((uint32_t)k8[10])<<8; /* fall through */

368

case 10: c+=((uint32_t)k8[9])<<16; /* fall through */

369

case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */

370

case 8 : b+=k[1]; a+=k[0]; break;

371

case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */

372

case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */

373

case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */

374

case 4 : a+=k[0]; break;

375

case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */

376

case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */

377

case 1 : a+=((uint32_t)k8[0])<<24; break;

378

case 0 : return c;

379

}

380

381

#endif /* !VALGRIND */

382

383

} else { /* need to read the key one byte at a time */

384

const uint8_t *k = key;

385

386

/*--------------- all but the last block: affect some 32 bits of (a,b,c) */

387

while (length > 12)

388

{

389

a += ((uint32_t)k[0])<<24;

390

a += ((uint32_t)k[1])<<16;

391

a += ((uint32_t)k[2])<<8;

392

a += ((uint32_t)k[3]);

393

b += ((uint32_t)k[4])<<24;

394

b += ((uint32_t)k[5])<<16;

395

b += ((uint32_t)k[6])<<8;

396

b += ((uint32_t)k[7]);

397

c += ((uint32_t)k[8])<<24;

398

c += ((uint32_t)k[9])<<16;

399

c += ((uint32_t)k[10])<<8;

400

c += ((uint32_t)k[11]);

401

mix(a,b,c);

402

length -= 12;

403

k += 12;

404

}

405

406

/*-------------------------------- last block: affect all 32 bits of (c) */

407

switch(length) /* all the case statements fall through */

408

{

409

case 12: c+=k[11];

410

case 11: c+=((uint32_t)k[10])<<8;

411

case 10: c+=((uint32_t)k[9])<<16;

412

case 9 : c+=((uint32_t)k[8])<<24;

413

case 8 : b+=k[7];

414

case 7 : b+=((uint32_t)k[6])<<8;

415

case 6 : b+=((uint32_t)k[5])<<16;

416

case 5 : b+=((uint32_t)k[4])<<24;

417

case 4 : a+=k[3];

418

case 3 : a+=((uint32_t)k[2])<<8;

419

case 2 : a+=((uint32_t)k[1])<<16;

420

case 1 : a+=((uint32_t)k[0])<<24;

421

break;

422

case 0 : return c;

423

}

424

}

425

426

final(a,b,c);

427

return c;

428

}

429

#else /* HASH_XXX_ENDIAN == 1 */

430

#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN

431

#endif /* HASH_XXX_ENDIAN == 1 */

Older »