1
/*-------------------------------------------------------------------------
4
* executor utility routines for grouping, hashing, and aggregation
6
* Note: we currently assume that equality and hashing functions are not
7
* collation-sensitive, so the code in this file has no support for passing
8
* collation settings through from callers. That may have to change someday.
10
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
11
* Portions Copyright (c) 1994, Regents of the University of California
15
* src/backend/executor/execGrouping.c
17
*-------------------------------------------------------------------------
21
#include "executor/executor.h"
22
#include "parser/parse_oper.h"
23
#include "utils/lsyscache.h"
24
#include "utils/memutils.h"
25
#include "utils/syscache.h"
28
static TupleHashTable CurTupleHashTable = NULL;
30
static uint32 TupleHashTableHash(const void *key, Size keysize);
31
static int TupleHashTableMatch(const void *key1, const void *key2,
35
/*****************************************************************************
36
* Utility routines for grouping tuples together
37
*****************************************************************************/
41
* Return true if two tuples match in all the indicated fields.
43
* This actually implements SQL's notion of "not distinct". Two nulls
44
* match, a null and a not-null don't match.
46
* slot1, slot2: the tuples to compare (must have same columns!)
47
* numCols: the number of attributes to be examined
48
* matchColIdx: array of attribute column numbers
49
* eqFunctions: array of fmgr lookup info for the equality functions to use
50
* evalContext: short-term memory context for executing the functions
52
* NB: evalContext is reset each time!
55
execTuplesMatch(TupleTableSlot *slot1,
56
TupleTableSlot *slot2,
58
AttrNumber *matchColIdx,
59
FmgrInfo *eqfunctions,
60
MemoryContext evalContext)
62
MemoryContext oldContext;
66
/* Reset and switch into the temp context. */
67
MemoryContextReset(evalContext);
68
oldContext = MemoryContextSwitchTo(evalContext);
71
* We cannot report a match without checking all the fields, but we can
72
* report a non-match as soon as we find unequal fields. So, start
73
* comparing at the last field (least significant sort key). That's the
74
* most likely to be different if we are dealing with sorted input.
78
for (i = numCols; --i >= 0;)
80
AttrNumber att = matchColIdx[i];
86
attr1 = slot_getattr(slot1, att, &isNull1);
88
attr2 = slot_getattr(slot2, att, &isNull2);
90
if (isNull1 != isNull2)
92
result = false; /* one null and one not; they aren't equal */
97
continue; /* both are null, treat as equal */
99
/* Apply the type-specific equality function */
101
if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
104
result = false; /* they aren't equal */
109
MemoryContextSwitchTo(oldContext);
116
* Return true if two tuples are definitely unequal in the indicated
119
* Nulls are neither equal nor unequal to anything else. A true result
120
* is obtained only if there are non-null fields that compare not-equal.
122
* Parameters are identical to execTuplesMatch.
125
execTuplesUnequal(TupleTableSlot *slot1,
126
TupleTableSlot *slot2,
128
AttrNumber *matchColIdx,
129
FmgrInfo *eqfunctions,
130
MemoryContext evalContext)
132
MemoryContext oldContext;
136
/* Reset and switch into the temp context. */
137
MemoryContextReset(evalContext);
138
oldContext = MemoryContextSwitchTo(evalContext);
141
* We cannot report a match without checking all the fields, but we can
142
* report a non-match as soon as we find unequal fields. So, start
143
* comparing at the last field (least significant sort key). That's the
144
* most likely to be different if we are dealing with sorted input.
148
for (i = numCols; --i >= 0;)
150
AttrNumber att = matchColIdx[i];
156
attr1 = slot_getattr(slot1, att, &isNull1);
159
continue; /* can't prove anything here */
161
attr2 = slot_getattr(slot2, att, &isNull2);
164
continue; /* can't prove anything here */
166
/* Apply the type-specific equality function */
168
if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
171
result = true; /* they are unequal */
176
MemoryContextSwitchTo(oldContext);
183
* execTuplesMatchPrepare
184
* Look up the equality functions needed for execTuplesMatch or
185
* execTuplesUnequal, given an array of equality operator OIDs.
187
* The result is a palloc'd array.
190
execTuplesMatchPrepare(int numCols,
193
FmgrInfo *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
196
for (i = 0; i < numCols; i++)
198
Oid eq_opr = eqOperators[i];
201
eq_function = get_opcode(eq_opr);
202
fmgr_info(eq_function, &eqFunctions[i]);
209
* execTuplesHashPrepare
210
* Look up the equality and hashing functions needed for a TupleHashTable.
212
* This is similar to execTuplesMatchPrepare, but we also need to find the
213
* hash functions associated with the equality operators. *eqFunctions and
214
* *hashFunctions receive the palloc'd result arrays.
216
* Note: we expect that the given operators are not cross-type comparisons.
219
execTuplesHashPrepare(int numCols,
221
FmgrInfo **eqFunctions,
222
FmgrInfo **hashFunctions)
226
*eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
227
*hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
229
for (i = 0; i < numCols; i++)
231
Oid eq_opr = eqOperators[i];
233
Oid left_hash_function;
234
Oid right_hash_function;
236
eq_function = get_opcode(eq_opr);
237
if (!get_op_hash_functions(eq_opr,
238
&left_hash_function, &right_hash_function))
239
elog(ERROR, "could not find hash function for hash operator %u",
241
/* We're not supporting cross-type cases here */
242
Assert(left_hash_function == right_hash_function);
243
fmgr_info(eq_function, &(*eqFunctions)[i]);
244
fmgr_info(right_hash_function, &(*hashFunctions)[i]);
249
/*****************************************************************************
250
* Utility routines for all-in-memory hash tables
252
* These routines build hash tables for grouping tuples together (eg, for
253
* hash aggregation). There is one entry for each not-distinct set of tuples
255
*****************************************************************************/
258
* Construct an empty TupleHashTable
260
* numCols, keyColIdx: identify the tuple fields to use as lookup key
261
* eqfunctions: equality comparison functions to use
262
* hashfunctions: datatype-specific hashing functions to use
263
* nbuckets: initial estimate of hashtable size
264
* entrysize: size of each entry (at least sizeof(TupleHashEntryData))
265
* tablecxt: memory context in which to store table and table entries
266
* tempcxt: short-lived context for evaluation hash and comparison functions
268
* The function arrays may be made with execTuplesHashPrepare(). Note they
269
* are not cross-type functions, but expect to see the table datatype(s)
272
* Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
273
* storage that will live as long as the hashtable does.
276
BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
277
FmgrInfo *eqfunctions,
278
FmgrInfo *hashfunctions,
279
int nbuckets, Size entrysize,
280
MemoryContext tablecxt, MemoryContext tempcxt)
282
TupleHashTable hashtable;
285
Assert(nbuckets > 0);
286
Assert(entrysize >= sizeof(TupleHashEntryData));
288
hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
289
sizeof(TupleHashTableData));
291
hashtable->numCols = numCols;
292
hashtable->keyColIdx = keyColIdx;
293
hashtable->tab_hash_funcs = hashfunctions;
294
hashtable->tab_eq_funcs = eqfunctions;
295
hashtable->tablecxt = tablecxt;
296
hashtable->tempcxt = tempcxt;
297
hashtable->entrysize = entrysize;
298
hashtable->tableslot = NULL; /* will be made on first lookup */
299
hashtable->inputslot = NULL;
300
hashtable->in_hash_funcs = NULL;
301
hashtable->cur_eq_funcs = NULL;
303
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
304
hash_ctl.keysize = sizeof(TupleHashEntryData);
305
hash_ctl.entrysize = entrysize;
306
hash_ctl.hash = TupleHashTableHash;
307
hash_ctl.match = TupleHashTableMatch;
308
hash_ctl.hcxt = tablecxt;
309
hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
311
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
317
* Find or create a hashtable entry for the tuple group containing the
318
* given tuple. The tuple must be the same type as the hashtable entries.
320
* If isnew is NULL, we do not create new entries; we return NULL if no
323
* If isnew isn't NULL, then a new entry is created if no existing entry
324
* matches. On return, *isnew is true if the entry is newly created,
325
* false if it existed already. Any extra space in a new entry has been
329
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
332
TupleHashEntry entry;
333
MemoryContext oldContext;
334
TupleHashTable saveCurHT;
335
TupleHashEntryData dummy;
338
/* If first time through, clone the input slot to make table slot */
339
if (hashtable->tableslot == NULL)
343
oldContext = MemoryContextSwitchTo(hashtable->tablecxt);
346
* We copy the input tuple descriptor just for safety --- we assume
347
* all input tuples will have equivalent descriptors.
349
tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
350
hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
351
MemoryContextSwitchTo(oldContext);
354
/* Need to run the hash functions in short-lived context */
355
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
358
* Set up data needed by hash and match functions
360
* We save and restore CurTupleHashTable just in case someone manages to
361
* invoke this code re-entrantly.
363
hashtable->inputslot = slot;
364
hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
365
hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;
367
saveCurHT = CurTupleHashTable;
368
CurTupleHashTable = hashtable;
370
/* Search the hash table */
371
dummy.firstTuple = NULL; /* flag to reference inputslot */
372
entry = (TupleHashEntry) hash_search(hashtable->hashtab,
374
isnew ? HASH_ENTER : HASH_FIND,
381
/* found pre-existing entry */
389
* Zero any caller-requested space in the entry. (This zaps the
390
* "key data" dynahash.c copied into the new entry, but we don't
391
* care since we're about to overwrite it anyway.)
393
MemSet(entry, 0, hashtable->entrysize);
395
/* Copy the first tuple into the table context */
396
MemoryContextSwitchTo(hashtable->tablecxt);
397
entry->firstTuple = ExecCopySlotMinimalTuple(slot);
403
CurTupleHashTable = saveCurHT;
405
MemoryContextSwitchTo(oldContext);
411
* Search for a hashtable entry matching the given tuple. No entry is
412
* created if there's not a match. This is similar to the non-creating
413
* case of LookupTupleHashEntry, except that it supports cross-type
414
* comparisons, in which the given tuple is not of the same type as the
415
* table entries. The caller must provide the hash functions to use for
416
* the input tuple, as well as the equality functions, since these may be
417
* different from the table's internal functions.
420
FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
421
FmgrInfo *eqfunctions,
422
FmgrInfo *hashfunctions)
424
TupleHashEntry entry;
425
MemoryContext oldContext;
426
TupleHashTable saveCurHT;
427
TupleHashEntryData dummy;
429
/* Need to run the hash functions in short-lived context */
430
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
433
* Set up data needed by hash and match functions
435
* We save and restore CurTupleHashTable just in case someone manages to
436
* invoke this code re-entrantly.
438
hashtable->inputslot = slot;
439
hashtable->in_hash_funcs = hashfunctions;
440
hashtable->cur_eq_funcs = eqfunctions;
442
saveCurHT = CurTupleHashTable;
443
CurTupleHashTable = hashtable;
445
/* Search the hash table */
446
dummy.firstTuple = NULL; /* flag to reference inputslot */
447
entry = (TupleHashEntry) hash_search(hashtable->hashtab,
452
CurTupleHashTable = saveCurHT;
454
MemoryContextSwitchTo(oldContext);
460
* Compute the hash value for a tuple
462
* The passed-in key is a pointer to TupleHashEntryData. In an actual hash
463
* table entry, the firstTuple field points to a tuple (in MinimalTuple
464
* format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
465
* NULL firstTuple field --- that cues us to look at the inputslot instead.
466
* This convention avoids the need to materialize virtual input tuples unless
467
* they actually need to get copied into the table.
469
* CurTupleHashTable must be set before calling this, since dynahash.c
470
* doesn't provide any API that would let us get at the hashtable otherwise.
472
* Also, the caller must select an appropriate memory context for running
473
* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
476
TupleHashTableHash(const void *key, Size keysize)
478
MinimalTuple tuple = ((const TupleHashEntryData *) key)->firstTuple;
479
TupleTableSlot *slot;
480
TupleHashTable hashtable = CurTupleHashTable;
481
int numCols = hashtable->numCols;
482
AttrNumber *keyColIdx = hashtable->keyColIdx;
483
FmgrInfo *hashfunctions;
489
/* Process the current input tuple for the table */
490
slot = hashtable->inputslot;
491
hashfunctions = hashtable->in_hash_funcs;
495
/* Process a tuple already stored in the table */
496
/* (this case never actually occurs in current dynahash.c code) */
497
slot = hashtable->tableslot;
498
ExecStoreMinimalTuple(tuple, slot, false);
499
hashfunctions = hashtable->tab_hash_funcs;
502
for (i = 0; i < numCols; i++)
504
AttrNumber att = keyColIdx[i];
508
/* rotate hashkey left 1 bit at each step */
509
hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
511
attr = slot_getattr(slot, att, &isNull);
513
if (!isNull) /* treat nulls as having hash key 0 */
517
hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
527
* See whether two tuples (presumably of the same hash value) match
529
* As above, the passed pointers are pointers to TupleHashEntryData.
531
* CurTupleHashTable must be set before calling this, since dynahash.c
532
* doesn't provide any API that would let us get at the hashtable otherwise.
534
* Also, the caller must select an appropriate memory context for running
535
* the compare functions. (dynahash.c doesn't change CurrentMemoryContext.)
538
TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
540
MinimalTuple tuple1 = ((const TupleHashEntryData *) key1)->firstTuple;
542
#ifdef USE_ASSERT_CHECKING
543
MinimalTuple tuple2 = ((const TupleHashEntryData *) key2)->firstTuple;
545
TupleTableSlot *slot1;
546
TupleTableSlot *slot2;
547
TupleHashTable hashtable = CurTupleHashTable;
550
* We assume that dynahash.c will only ever call us with the first
551
* argument being an actual table entry, and the second argument being
552
* LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
553
* could be supported too, but is not currently used by dynahash.c.
555
Assert(tuple1 != NULL);
556
slot1 = hashtable->tableslot;
557
ExecStoreMinimalTuple(tuple1, slot1, false);
558
Assert(tuple2 == NULL);
559
slot2 = hashtable->inputslot;
561
/* For crosstype comparisons, the inputslot must be first */
562
if (execTuplesMatch(slot2,
565
hashtable->keyColIdx,
566
hashtable->cur_eq_funcs,