1
/*-------------------------------------------------------------------------
4
* executor utility routines for grouping, hashing, and aggregation
6
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
11
* $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.13 2004-12-31 21:59:45 pgsql Exp $
13
*-------------------------------------------------------------------------
17
#include "access/hash.h"
18
#include "access/heapam.h"
19
#include "executor/executor.h"
20
#include "parser/parse_oper.h"
21
#include "utils/memutils.h"
22
#include "utils/lsyscache.h"
23
#include "utils/syscache.h"
26
static TupleHashTable CurTupleHashTable = NULL;
28
static uint32 TupleHashTableHash(const void *key, Size keysize);
29
static int TupleHashTableMatch(const void *key1, const void *key2,
33
/*****************************************************************************
34
* Utility routines for grouping tuples together
35
*****************************************************************************/
39
* Return true if two tuples match in all the indicated fields.
41
* This actually implements SQL's notion of "not distinct". Two nulls
42
* match, a null and a not-null don't match.
44
* tuple1, tuple2: the tuples to compare
45
* tupdesc: tuple descriptor applying to both tuples
46
* numCols: the number of attributes to be examined
47
* matchColIdx: array of attribute column numbers
48
* eqFunctions: array of fmgr lookup info for the equality functions to use
49
* evalContext: short-term memory context for executing the functions
51
* NB: evalContext is reset each time!
54
execTuplesMatch(HeapTuple tuple1,
58
AttrNumber *matchColIdx,
59
FmgrInfo *eqfunctions,
60
MemoryContext evalContext)
62
MemoryContext oldContext;
66
/* Reset and switch into the temp context. */
67
MemoryContextReset(evalContext);
68
oldContext = MemoryContextSwitchTo(evalContext);
71
* We cannot report a match without checking all the fields, but we
72
* can report a non-match as soon as we find unequal fields. So,
73
* start comparing at the last field (least significant sort key).
74
* That's the most likely to be different if we are dealing with
79
for (i = numCols; --i >= 0;)
81
AttrNumber att = matchColIdx[i];
87
attr1 = heap_getattr(tuple1,
92
attr2 = heap_getattr(tuple2,
97
if (isNull1 != isNull2)
99
result = false; /* one null and one not; they aren't equal */
104
continue; /* both are null, treat as equal */
106
/* Apply the type-specific equality function */
108
if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
111
result = false; /* they aren't equal */
116
MemoryContextSwitchTo(oldContext);
123
* Return true if two tuples are definitely unequal in the indicated
126
* Nulls are neither equal nor unequal to anything else. A true result
127
* is obtained only if there are non-null fields that compare not-equal.
129
* Parameters are identical to execTuplesMatch.
132
execTuplesUnequal(HeapTuple tuple1,
136
AttrNumber *matchColIdx,
137
FmgrInfo *eqfunctions,
138
MemoryContext evalContext)
140
MemoryContext oldContext;
144
/* Reset and switch into the temp context. */
145
MemoryContextReset(evalContext);
146
oldContext = MemoryContextSwitchTo(evalContext);
149
* We cannot report a match without checking all the fields, but we
150
* can report a non-match as soon as we find unequal fields. So,
151
* start comparing at the last field (least significant sort key).
152
* That's the most likely to be different if we are dealing with
157
for (i = numCols; --i >= 0;)
159
AttrNumber att = matchColIdx[i];
165
attr1 = heap_getattr(tuple1,
171
continue; /* can't prove anything here */
173
attr2 = heap_getattr(tuple2,
179
continue; /* can't prove anything here */
181
/* Apply the type-specific equality function */
183
if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
186
result = true; /* they are unequal */
191
MemoryContextSwitchTo(oldContext);
198
* execTuplesMatchPrepare
199
* Look up the equality functions needed for execTuplesMatch or
202
* The result is a palloc'd array.
205
execTuplesMatchPrepare(TupleDesc tupdesc,
207
AttrNumber *matchColIdx)
209
FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
212
for (i = 0; i < numCols; i++)
214
AttrNumber att = matchColIdx[i];
215
Oid typid = tupdesc->attrs[att - 1]->atttypid;
218
eq_function = equality_oper_funcid(typid);
219
fmgr_info(eq_function, &eqfunctions[i]);
226
* execTuplesHashPrepare
227
* Look up the equality and hashing functions needed for a TupleHashTable.
229
* This is similar to execTuplesMatchPrepare, but we also need to find the
230
* hash functions associated with the equality operators. *eqfunctions and
231
* *hashfunctions receive the palloc'd result arrays.
234
execTuplesHashPrepare(TupleDesc tupdesc,
236
AttrNumber *matchColIdx,
237
FmgrInfo **eqfunctions,
238
FmgrInfo **hashfunctions)
242
*eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
243
*hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
245
for (i = 0; i < numCols; i++)
247
AttrNumber att = matchColIdx[i];
248
Oid typid = tupdesc->attrs[att - 1]->atttypid;
254
optup = equality_oper(typid, false);
255
eq_opr = oprid(optup);
256
eq_function = oprfuncid(optup);
257
ReleaseSysCache(optup);
258
hash_function = get_op_hash_function(eq_opr);
259
if (!OidIsValid(hash_function)) /* should not happen */
260
elog(ERROR, "could not find hash function for hash operator %u",
262
fmgr_info(eq_function, &(*eqfunctions)[i]);
263
fmgr_info(hash_function, &(*hashfunctions)[i]);
268
/*****************************************************************************
269
* Utility routines for all-in-memory hash tables
271
* These routines build hash tables for grouping tuples together (eg, for
272
* hash aggregation). There is one entry for each not-distinct set of tuples
274
*****************************************************************************/
277
* Construct an empty TupleHashTable
279
* numCols, keyColIdx: identify the tuple fields to use as lookup key
280
* eqfunctions: equality comparison functions to use
281
* hashfunctions: datatype-specific hashing functions to use
282
* nbuckets: initial estimate of hashtable size
283
* entrysize: size of each entry (at least sizeof(TupleHashEntryData))
284
* tablecxt: memory context in which to store table and table entries
285
* tempcxt: short-lived context for evaluation hash and comparison functions
287
* The function arrays may be made with execTuplesHashPrepare().
289
* Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
290
* storage that will live as long as the hashtable does.
293
BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
294
FmgrInfo *eqfunctions,
295
FmgrInfo *hashfunctions,
296
int nbuckets, Size entrysize,
297
MemoryContext tablecxt, MemoryContext tempcxt)
299
TupleHashTable hashtable;
302
Assert(nbuckets > 0);
303
Assert(entrysize >= sizeof(TupleHashEntryData));
305
hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
306
sizeof(TupleHashTableData));
308
hashtable->numCols = numCols;
309
hashtable->keyColIdx = keyColIdx;
310
hashtable->eqfunctions = eqfunctions;
311
hashtable->hashfunctions = hashfunctions;
312
hashtable->tablecxt = tablecxt;
313
hashtable->tempcxt = tempcxt;
314
hashtable->entrysize = entrysize;
316
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
317
hash_ctl.keysize = sizeof(TupleHashEntryData);
318
hash_ctl.entrysize = entrysize;
319
hash_ctl.hash = TupleHashTableHash;
320
hash_ctl.match = TupleHashTableMatch;
321
hash_ctl.hcxt = tablecxt;
322
hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
324
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
330
* Find or create a hashtable entry for the tuple group containing the
333
* If isnew is NULL, we do not create new entries; we return NULL if no
336
* If isnew isn't NULL, then a new entry is created if no existing entry
337
* matches. On return, *isnew is true if the entry is newly created,
338
* false if it existed already. Any extra space in a new entry has been
342
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
345
HeapTuple tuple = slot->val;
346
TupleDesc tupdesc = slot->ttc_tupleDescriptor;
347
TupleHashEntry entry;
348
MemoryContext oldContext;
349
TupleHashTable saveCurHT;
352
/* Need to run the hash functions in short-lived context */
353
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
356
* Set up data needed by hash and match functions
358
* We save and restore CurTupleHashTable just in case someone manages to
359
* invoke this code re-entrantly.
361
hashtable->tupdesc = tupdesc;
362
saveCurHT = CurTupleHashTable;
363
CurTupleHashTable = hashtable;
365
/* Search the hash table */
366
entry = (TupleHashEntry) hash_search(hashtable->hashtab,
368
isnew ? HASH_ENTER : HASH_FIND,
375
/* found pre-existing entry */
380
/* created new entry ... we hope */
383
(errcode(ERRCODE_OUT_OF_MEMORY),
384
errmsg("out of memory")));
387
* Zero any caller-requested space in the entry. (This zaps
388
* the "key data" dynahash.c copied into the new entry, but we
389
* don't care since we're about to overwrite it anyway.)
391
MemSet(entry, 0, hashtable->entrysize);
393
/* Copy the first tuple into the table context */
394
MemoryContextSwitchTo(hashtable->tablecxt);
395
entry->firstTuple = heap_copytuple(tuple);
401
CurTupleHashTable = saveCurHT;
403
MemoryContextSwitchTo(oldContext);
409
* Compute the hash value for a tuple
411
* The passed-in key is a pointer to a HeapTuple pointer -- this is either
412
* the firstTuple field of a TupleHashEntry struct, or the key value passed
413
* to hash_search. We ignore the keysize.
415
* CurTupleHashTable must be set before calling this, since dynahash.c
416
* doesn't provide any API that would let us get at the hashtable otherwise.
418
* Also, the caller must select an appropriate memory context for running
419
* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
422
TupleHashTableHash(const void *key, Size keysize)
424
HeapTuple tuple = *(const HeapTuple *) key;
425
TupleHashTable hashtable = CurTupleHashTable;
426
int numCols = hashtable->numCols;
427
AttrNumber *keyColIdx = hashtable->keyColIdx;
428
TupleDesc tupdesc = hashtable->tupdesc;
432
for (i = 0; i < numCols; i++)
434
AttrNumber att = keyColIdx[i];
438
/* rotate hashkey left 1 bit at each step */
439
hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
441
attr = heap_getattr(tuple, att, tupdesc, &isNull);
443
if (!isNull) /* treat nulls as having hash key 0 */
447
hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
457
* See whether two tuples (presumably of the same hash value) match
459
* As above, the passed pointers are pointers to HeapTuple pointers.
461
* CurTupleHashTable must be set before calling this, since dynahash.c
462
* doesn't provide any API that would let us get at the hashtable otherwise.
464
* Also, the caller must select an appropriate memory context for running
465
* the compare functions. (dynahash.c doesn't change CurrentMemoryContext.)
468
TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
470
HeapTuple tuple1 = *(const HeapTuple *) key1;
471
HeapTuple tuple2 = *(const HeapTuple *) key2;
472
TupleHashTable hashtable = CurTupleHashTable;
474
if (execTuplesMatch(tuple1,
478
hashtable->keyColIdx,
479
hashtable->eqfunctions,