~fractalcat/gearmand/docfixes

« back to all changes in this revision

Viewing changes to libhashkit/jenkins.cc

Committer: Brian Aker
Date: 2012-12-13 11:44:26 UTC
mto: (621.4.66 workspace)
mto: This revision was merged to the branch mainline in revision 676.
Revision ID: brian@tangent.org-20121213114426-lnrt6aysy7lqc01h

Adding support for deriving the unique value based on the data that is supplied by the client.

files added:
libhashkit

libhashkit-1.0

libhashkit-1.0/algorithm.h

libhashkit-1.0/basic_string.h

libhashkit-1.0/behavior.h

libhashkit-1.0/configure.h.in

libhashkit-1.0/digest.h

libhashkit-1.0/function.h

libhashkit-1.0/has.h

libhashkit-1.0/hashkit.h

libhashkit-1.0/hashkit.hpp

libhashkit-1.0/include.am

libhashkit-1.0/str_algorithm.h

libhashkit-1.0/strerror.h

libhashkit-1.0/string.h

libhashkit-1.0/types.h

libhashkit-1.0/visibility.h

libhashkit/aes.cc

libhashkit/aes.h

libhashkit/algorithm.cc

libhashkit/algorithm.h

libhashkit/behavior.cc

libhashkit/common.h

libhashkit/crc32.cc

libhashkit/digest.cc

libhashkit/encrypt.cc

libhashkit/fnv_32.cc

libhashkit/fnv_64.cc

libhashkit/function.cc

libhashkit/has.cc

libhashkit/hashkit.cc

libhashkit/hashkit.h

libhashkit/hashkitcon.h.in

libhashkit/hsieh.cc

libhashkit/include.am

libhashkit/include.m4

libhashkit/is.h

libhashkit/jenkins.cc

libhashkit/ketama.cc

libhashkit/md5.cc

libhashkit/murmur.cc

libhashkit/murmur3.cc

libhashkit/murmur3.h

libhashkit/murmur3_api.cc

libhashkit/nohsieh.cc

libhashkit/one_at_a_time.cc

libhashkit/rijndael.cc

libhashkit/rijndael.hpp

libhashkit/str_algorithm.cc

libhashkit/strerror.cc

libhashkit/string.cc

libhashkit/string.h

files modified:
.bzrignore

ChangeLog

Makefile.am

configure.ac

libgearman/add.cc

libgearman/include.am

libgearman/unique.cc

libgearman/unique.hpp

tests/libgearman-1.0/client_test.cc

tests/libgearman-1.0/unique.cc

tests/unique.h

Show diffs side-by-side

added added

removed removed

libhashkit/jenkins.cc

/* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:

* HashKit library

* Redistribution and use in source and binary forms, with or without

* modification, are permitted provided that the following conditions are

* met:

* * Redistributions of source code must retain the above copyright

* notice, this list of conditions and the following disclaimer.

* * Redistributions in binary form must reproduce the above

* copyright notice, this list of conditions and the following disclaimer

* in the documentation and/or other materials provided with the

* distribution.

* * The names of its contributors may not be used to endorse or

* promote products derived from this software without specific prior

* written permission.

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

* By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this

* code any way you wish, private, educational, or commercial. It's free.

* Use for hash table lookup, or anything where one collision in 2^^32 is

* acceptable. Do NOT use for cryptographic purposes.

* http://burtleburtle.net/bob/hash/index.html

* Modified by Brian Pontz for libmemcached

* TODO:

* Add big endian support

#include <libhashkit/common.h>

#define hashsize(n) ((uint32_t)1<<(n))

#define hashmask(n) (hashsize(n)-1)

#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))

#define mix(a,b,c) \

{ \

a -= c; a ^= rot(c, 4); c += b; \

b -= a; b ^= rot(a, 6); a += c; \

c -= b; c ^= rot(b, 8); b += a; \

a -= c; a ^= rot(c,16); c += b; \

b -= a; b ^= rot(a,19); a += c; \

c -= b; c ^= rot(b, 4); b += a; \

}

#define final(a,b,c) \

{ \

c ^= b; c -= rot(b,14); \

a ^= c; a -= rot(c,11); \

b ^= a; b -= rot(a,25); \

c ^= b; c -= rot(b,16); \

a ^= c; a -= rot(c,4); \

b ^= a; b -= rot(a,14); \

c ^= b; c -= rot(b,24); \

}

#define JENKINS_INITVAL 13

jenkins_hash() -- hash a variable-length key into a 32-bit value

k : the key (the unaligned variable-length array of bytes)

length : the length of the key, counting by bytes

initval : can be any 4-byte value

Returns a 32-bit value. Every bit of the key affects every bit of

the return value. Two keys differing by one or two bits will have

totally different hash values.

The best hash table sizes are powers of 2. There is no need to do

mod a prime (mod is sooo slow!). If you need less than 32 bits,

use a bitmask. For example, if you need only 10 bits, do

h = (h & hashmask(10));

In which case, the hash table should have hashsize(10) elements.

uint32_t hashkit_jenkins(const char *key, size_t length, void *)

{

uint32_t a,b,c; /* internal state */

#ifndef WORDS_BIGENDIAN

100

union { const void *ptr; size_t i; } u;

101

u.ptr = key;

102

#endif

103

104

/* Set up the internal state */

105

a = b = c = 0xdeadbeef + ((uint32_t)length) + JENKINS_INITVAL;

106

107

#ifndef WORDS_BIGENDIAN

108

if ((u.i & 0x3) == 0)

109

{

110

const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */

111

112

/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */

113

while (length > 12)

114

{

115

a += k[0];

116

b += k[1];

117

c += k[2];

118

mix(a,b,c);

119

length -= 12;

120

k += 3;

121

}

122

123

/*----------------------------- handle the last (probably partial) block */

124

125

* "k[2]&0xffffff" actually reads beyond the end of the string, but

126

* then masks off the part it's not allowed to read. Because the

127

* string is aligned, the masked-off tail is in the same word as the

128

* rest of the string. Every machine with memory protection I've seen

129

* does it on word boundaries, so is OK with this. But VALGRIND will

130

* still catch it and complain. The masking trick does make the hash

131

* noticably faster for short strings (like English words).

132

133

switch(length)

134

{

135

case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;

136

case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;

137

case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;

138

case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;

139

case 8 : b+=k[1]; a+=k[0]; break;

140

case 7 : b+=k[1]&0xffffff; a+=k[0]; break;

141

case 6 : b+=k[1]&0xffff; a+=k[0]; break;

142

case 5 : b+=k[1]&0xff; a+=k[0]; break;

143

case 4 : a+=k[0]; break;

144

case 3 : a+=k[0]&0xffffff; break;

145

case 2 : a+=k[0]&0xffff; break;

146

case 1 : a+=k[0]&0xff; break;

147

case 0 : return c; /* zero length strings require no mixing */

148

default: return c;

149

}

150

151

}

152

else if ((u.i & 0x1) == 0)

153

{

154

const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */

155

const uint8_t *k8;

156

157

/*--------------- all but last block: aligned reads and different mixing */

158

while (length > 12)

159

{

160

a += k[0] + (((uint32_t)k[1])<<16);

161

b += k[2] + (((uint32_t)k[3])<<16);

162

c += k[4] + (((uint32_t)k[5])<<16);

163

mix(a,b,c);

164

length -= 12;

165

k += 6;

166

}

167

168

/*----------------------------- handle the last (probably partial) block */

169

k8 = (const uint8_t *)k;

170

switch(length)

171

{

172

case 12: c+=k[4]+(((uint32_t)k[5])<<16);

173

b+=k[2]+(((uint32_t)k[3])<<16);

174

a+=k[0]+(((uint32_t)k[1])<<16);

175

break;

176

case 11: c+=((uint32_t)k8[10])<<16; /* fall through */

177

case 10: c+=k[4];

178

b+=k[2]+(((uint32_t)k[3])<<16);

179

a+=k[0]+(((uint32_t)k[1])<<16);

180

break;

181

case 9 : c+=k8[8]; /* fall through */

182

case 8 : b+=k[2]+(((uint32_t)k[3])<<16);

183

a+=k[0]+(((uint32_t)k[1])<<16);

184

break;

185

case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */

186

case 6 : b+=k[2];

187

a+=k[0]+(((uint32_t)k[1])<<16);

188

break;

189

case 5 : b+=k8[4]; /* fall through */

190

case 4 : a+=k[0]+(((uint32_t)k[1])<<16);

191

break;

192

case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */

193

case 2 : a+=k[0];

194

break;

195

case 1 : a+=k8[0];

196

break;

197

case 0 : return c; /* zero length requires no mixing */

198

default: return c;

199

}

200

201

}

202

else

203

{ /* need to read the key one byte at a time */

204

#endif /* little endian */

205

const uint8_t *k = (const uint8_t *)key;

206

207

/*--------------- all but the last block: affect some 32 bits of (a,b,c) */

208

while (length > 12)

209

{

210

a += k[0];

211

a += ((uint32_t)k[1])<<8;

212

a += ((uint32_t)k[2])<<16;

213

a += ((uint32_t)k[3])<<24;

214

b += k[4];

215

b += ((uint32_t)k[5])<<8;

216

b += ((uint32_t)k[6])<<16;

217

b += ((uint32_t)k[7])<<24;

218

c += k[8];

219

c += ((uint32_t)k[9])<<8;

220

c += ((uint32_t)k[10])<<16;

221

c += ((uint32_t)k[11])<<24;

222

mix(a,b,c);

223

length -= 12;

224

k += 12;

225

}

226

227

/*-------------------------------- last block: affect all 32 bits of (c) */

228

switch(length) /* all the case statements fall through */

229

{

230

case 12: c+=((uint32_t)k[11])<<24;

231

case 11: c+=((uint32_t)k[10])<<16;

232

case 10: c+=((uint32_t)k[9])<<8;

233

case 9 : c+=k[8];

234

case 8 : b+=((uint32_t)k[7])<<24;

235

case 7 : b+=((uint32_t)k[6])<<16;

236

case 6 : b+=((uint32_t)k[5])<<8;

237

case 5 : b+=k[4];

238

case 4 : a+=((uint32_t)k[3])<<24;

239

case 3 : a+=((uint32_t)k[2])<<16;

240

case 2 : a+=((uint32_t)k[1])<<8;

241

case 1 : a+=k[0];

242

break;

243

case 0 : return c;

244

default : return c;

245

}

246

#ifndef WORDS_BIGENDIAN

247

}

248

#endif

249

250

final(a,b,c);

251

return c;

252

}

Older »