1
/*-------------------------------------------------------------------------
4
* fetch tuples from a GIN scan.
7
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
8
* Portions Copyright (c) 1994, Regents of the University of California
11
* src/backend/access/gin/ginget.c
12
*-------------------------------------------------------------------------
17
#include "access/gin_private.h"
18
#include "access/relscan.h"
19
#include "catalog/index.h"
20
#include "miscadmin.h"
21
#include "storage/bufmgr.h"
22
#include "utils/datum.h"
23
#include "utils/memutils.h"
26
typedef struct pendingPosition
29
OffsetNumber firstOffset;
30
OffsetNumber lastOffset;
37
* Convenience function for invoking a key's consistentFn
40
callConsistentFn(GinState *ginstate, GinScanKey key)
43
* If we're dealing with a dummy EVERYTHING key, we don't want to call the
44
* consistentFn; just claim it matches.
46
if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
48
key->recheckCurItem = false;
53
* Initialize recheckCurItem in case the consistentFn doesn't know it
54
* should set it. The safe assumption in that case is to force recheck.
56
key->recheckCurItem = true;
58
return DatumGetBool(FunctionCall8Coll(&ginstate->consistentFn[key->attnum - 1],
59
ginstate->supportCollation[key->attnum - 1],
60
PointerGetDatum(key->entryRes),
61
UInt16GetDatum(key->strategy),
63
UInt32GetDatum(key->nuserentries),
64
PointerGetDatum(key->extra_data),
65
PointerGetDatum(&key->recheckCurItem),
66
PointerGetDatum(key->queryValues),
67
PointerGetDatum(key->queryCategories)));
71
* Tries to refind previously taken ItemPointer on a posting page.
74
findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off)
76
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
79
if (GinPageGetOpaque(page)->flags & GIN_DELETED)
80
/* page was deleted by concurrent vacuum */
84
* scan page to find equal or first greater value
86
for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
88
res = ginCompareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
98
* Goes to the next page if current offset is outside of bounds
101
moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
103
Page page = BufferGetPage(stack->buffer);
105
if (stack->off > PageGetMaxOffsetNumber(page))
108
* We scanned the whole page, so we should take right page
110
stack->blkno = GinPageGetOpaque(page)->rightlink;
112
if (GinPageRightMost(page))
113
return false; /* no more pages */
115
LockBuffer(stack->buffer, GIN_UNLOCK);
116
stack->buffer = ReleaseAndReadBuffer(stack->buffer,
119
LockBuffer(stack->buffer, GIN_SHARE);
120
stack->off = FirstOffsetNumber;
127
* Scan all pages of a posting tree and save all its heap ItemPointers
128
* in scanEntry->matchBitmap
131
scanPostingTree(Relation index, GinScanEntry scanEntry,
132
BlockNumber rootPostingTree)
134
GinPostingTreeScan *gdi;
139
/* Descend to the leftmost leaf page */
140
gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE);
142
buffer = ginScanBeginPostingTree(gdi);
143
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
145
freeGinBtreeStack(gdi->stack);
149
* Loop iterates through all leaf pages of posting tree
153
page = BufferGetPage(buffer);
155
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 &&
156
GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
158
tbm_add_tuples(scanEntry->matchBitmap,
159
(ItemPointer) GinDataPageGetItem(page, FirstOffsetNumber),
160
GinPageGetOpaque(page)->maxoff, false);
161
scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
164
if (GinPageRightMost(page))
165
break; /* no more pages */
167
blkno = GinPageGetOpaque(page)->rightlink;
168
LockBuffer(buffer, GIN_UNLOCK);
169
buffer = ReleaseAndReadBuffer(buffer, index, blkno);
170
LockBuffer(buffer, GIN_SHARE);
173
UnlockReleaseBuffer(buffer);
177
* Collects TIDs into scanEntry->matchBitmap for all heap tuples that
178
* match the search entry. This supports three different match modes:
180
* 1. Partial-match support: scan from current point until the
181
* comparePartialFn says we're done.
182
* 2. SEARCH_MODE_ALL: scan from current point (which should be first
183
* key for the current attnum) until we hit null items or end of attnum
184
* 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
185
* key for the current attnum) until we hit end of attnum
187
* Returns true if done, false if it's necessary to restart scan from scratch
190
collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
191
GinScanEntry scanEntry)
194
Form_pg_attribute attr;
196
/* Initialize empty bitmap result */
197
scanEntry->matchBitmap = tbm_create(work_mem * 1024L);
199
/* Null query cannot partial-match anything */
200
if (scanEntry->isPartialMatch &&
201
scanEntry->queryCategory != GIN_CAT_NORM_KEY)
204
/* Locate tupdesc entry for key column (for attbyval/attlen data) */
205
attnum = scanEntry->attnum;
206
attr = btree->ginstate->origTupdesc->attrs[attnum - 1];
213
GinNullCategory icategory;
216
* stack->off points to the interested entry, buffer is already locked
218
if (moveRightIfItNeeded(btree, stack) == false)
221
page = BufferGetPage(stack->buffer);
222
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
225
* If tuple stores another attribute then stop scan
227
if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
230
/* Safe to fetch attribute value */
231
idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
234
* Check for appropriate scan stop conditions
236
if (scanEntry->isPartialMatch)
241
* In partial match, stop scan at any null (including
242
* placeholders); partial matches never match nulls
244
if (icategory != GIN_CAT_NORM_KEY)
248
* Check of partial match.
249
* case cmp == 0 => match
250
* case cmp > 0 => not match and finish scan
251
* case cmp < 0 => not match and continue scan
254
cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
255
btree->ginstate->supportCollation[attnum - 1],
258
UInt16GetDatum(scanEntry->strategy),
259
PointerGetDatum(scanEntry->extra_data)));
269
else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
272
* In ALL mode, we are not interested in null items, so we can
273
* stop if we get to a null-item placeholder (which will be the
274
* last entry for a given attnum). We do want to include NULL_KEY
275
* and EMPTY_ITEM entries, though.
277
if (icategory == GIN_CAT_NULL_ITEM)
282
* OK, we want to return the TIDs listed in this entry.
284
if (GinIsPostingTree(itup))
286
BlockNumber rootPostingTree = GinGetPostingTree(itup);
289
* We should unlock current page (but not unpin) during tree scan
290
* to prevent deadlock with vacuum processes.
292
* We save current entry value (idatum) to be able to re-find our
293
* tuple after re-locking
295
if (icategory == GIN_CAT_NORM_KEY)
296
idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
298
LockBuffer(stack->buffer, GIN_UNLOCK);
300
/* Collect all the TIDs in this entry's posting tree */
301
scanPostingTree(btree->index, scanEntry, rootPostingTree);
304
* We lock again the entry page and while it was unlocked insert
305
* might have occurred, so we need to re-find our position.
307
LockBuffer(stack->buffer, GIN_SHARE);
308
page = BufferGetPage(stack->buffer);
309
if (!GinPageIsLeaf(page))
312
* Root page becomes non-leaf while we unlock it. We will
313
* start again, this situation doesn't occur often - root can
314
* became a non-leaf only once per life of index.
319
/* Search forward to re-find idatum */
323
GinNullCategory newCategory;
325
if (moveRightIfItNeeded(btree, stack) == false)
326
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
328
page = BufferGetPage(stack->buffer);
329
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
331
if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
332
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
333
newDatum = gintuple_get_key(btree->ginstate, itup,
336
if (ginCompareEntries(btree->ginstate, attnum,
337
newDatum, newCategory,
338
idatum, icategory) == 0)
344
if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
345
pfree(DatumGetPointer(idatum));
349
tbm_add_tuples(scanEntry->matchBitmap,
350
GinGetPosting(itup), GinGetNPosting(itup), false);
351
scanEntry->predictNumberResult += GinGetNPosting(itup);
355
* Done with this entry, go to the next
364
* Start* functions setup beginning state of searches: finds correct buffer and pins it.
367
startScanEntry(GinState *ginstate, GinScanEntry entry)
369
GinBtreeData btreeEntry;
370
GinBtreeStack *stackEntry;
375
entry->buffer = InvalidBuffer;
376
ItemPointerSetMin(&entry->curItem);
377
entry->offset = InvalidOffsetNumber;
380
entry->matchBitmap = NULL;
381
entry->matchResult = NULL;
382
entry->reduceResult = FALSE;
383
entry->predictNumberResult = 0;
386
* we should find entry, and begin scan of posting tree or just store
387
* posting list in memory
389
ginPrepareEntryScan(&btreeEntry, entry->attnum,
390
entry->queryKey, entry->queryCategory,
392
btreeEntry.searchMode = TRUE;
393
stackEntry = ginFindLeafPage(&btreeEntry, NULL);
394
page = BufferGetPage(stackEntry->buffer);
397
entry->isFinished = TRUE;
399
if (entry->isPartialMatch ||
400
entry->queryCategory == GIN_CAT_EMPTY_QUERY)
403
* btreeEntry.findItem locates the first item >= given search key.
404
* (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
405
* because of the way the GIN_CAT_EMPTY_QUERY category code is
406
* assigned.) We scan forward from there and collect all TIDs needed
407
* for the entry type.
409
btreeEntry.findItem(&btreeEntry, stackEntry);
410
if (collectMatchBitmap(&btreeEntry, stackEntry, entry) == false)
413
* GIN tree was seriously restructured, so we will cleanup all
414
* found data and rescan. See comments near 'return false' in
415
* collectMatchBitmap()
417
if (entry->matchBitmap)
419
if (entry->matchIterator)
420
tbm_end_iterate(entry->matchIterator);
421
entry->matchIterator = NULL;
422
tbm_free(entry->matchBitmap);
423
entry->matchBitmap = NULL;
425
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
426
freeGinBtreeStack(stackEntry);
427
goto restartScanEntry;
430
if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
432
entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
433
entry->isFinished = FALSE;
436
else if (btreeEntry.findItem(&btreeEntry, stackEntry))
438
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
440
if (GinIsPostingTree(itup))
442
BlockNumber rootPostingTree = GinGetPostingTree(itup);
443
GinPostingTreeScan *gdi;
447
* We should unlock entry page before touching posting tree to
448
* prevent deadlocks with vacuum processes. Because entry is never
449
* deleted from page and posting tree is never reduced to the
450
* posting list, we can unlock page after getting BlockNumber of
451
* root of posting tree.
453
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
455
gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, TRUE);
457
entry->buffer = ginScanBeginPostingTree(gdi);
460
* We keep buffer pinned because we need to prevent deletion of
461
* page during scan. See GIN's vacuum implementation. RefCount is
462
* increased to keep buffer pinned after freeGinBtreeStack() call.
464
IncrBufferRefCount(entry->buffer);
466
page = BufferGetPage(entry->buffer);
467
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
470
* Keep page content in memory to prevent durable page locking
472
entry->list = (ItemPointerData *) palloc(BLCKSZ);
473
entry->nlist = GinPageGetOpaque(page)->maxoff;
474
memcpy(entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
475
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
477
LockBuffer(entry->buffer, GIN_UNLOCK);
478
freeGinBtreeStack(gdi->stack);
480
entry->isFinished = FALSE;
482
else if (GinGetNPosting(itup) > 0)
484
entry->nlist = GinGetNPosting(itup);
485
entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
486
memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
487
entry->isFinished = FALSE;
492
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
493
freeGinBtreeStack(stackEntry);
497
startScanKey(GinState *ginstate, GinScanKey key)
499
ItemPointerSetMin(&key->curItem);
500
key->curItemMatches = false;
501
key->recheckCurItem = false;
502
key->isFinished = false;
506
startScan(IndexScanDesc scan)
508
GinScanOpaque so = (GinScanOpaque) scan->opaque;
509
GinState *ginstate = &so->ginstate;
512
for (i = 0; i < so->totalentries; i++)
513
startScanEntry(ginstate, so->entries[i]);
515
if (GinFuzzySearchLimit > 0)
518
* If all of keys more than threshold we will try to reduce result, we
519
* hope (and only hope, for intersection operation of array our
520
* supposition isn't true), that total result will not more than
521
* minimal predictNumberResult.
524
for (i = 0; i < so->totalentries; i++)
525
if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
528
for (i = 0; i < so->totalentries; i++)
529
if (so->entries[i]->predictNumberResult > so->totalentries * GinFuzzySearchLimit)
531
so->entries[i]->predictNumberResult /= so->totalentries;
532
so->entries[i]->reduceResult = TRUE;
536
for (i = 0; i < so->nkeys; i++)
537
startScanKey(ginstate, so->keys + i);
541
* Gets next ItemPointer from PostingTree. Note, that we copy
542
* page into GinScanEntry->list array and unlock page, but keep it pinned
543
* to prevent interference with vacuum
546
entryGetNextItem(GinState *ginstate, GinScanEntry entry)
553
if (entry->offset < entry->nlist)
555
entry->curItem = entry->list[entry->offset++];
559
LockBuffer(entry->buffer, GIN_SHARE);
560
page = BufferGetPage(entry->buffer);
564
* It's needed to go by right link. During that we should refind
565
* first ItemPointer greater that stored
568
blkno = GinPageGetOpaque(page)->rightlink;
570
LockBuffer(entry->buffer, GIN_UNLOCK);
571
if (blkno == InvalidBlockNumber)
573
ReleaseBuffer(entry->buffer);
574
ItemPointerSetInvalid(&entry->curItem);
575
entry->buffer = InvalidBuffer;
576
entry->isFinished = TRUE;
580
entry->buffer = ReleaseAndReadBuffer(entry->buffer,
583
LockBuffer(entry->buffer, GIN_SHARE);
584
page = BufferGetPage(entry->buffer);
586
entry->offset = InvalidOffsetNumber;
587
if (!ItemPointerIsValid(&entry->curItem) ||
588
findItemInPostingPage(page, &entry->curItem, &entry->offset))
591
* Found position equal to or greater than stored
593
entry->nlist = GinPageGetOpaque(page)->maxoff;
594
memcpy(entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
595
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
597
LockBuffer(entry->buffer, GIN_UNLOCK);
599
if (!ItemPointerIsValid(&entry->curItem) ||
600
ginCompareItemPointers(&entry->curItem,
601
entry->list + entry->offset - 1) == 0)
604
* First pages are deleted or empty, or we found exact
605
* position, so break inner loop and continue outer one.
611
* Find greater than entry->curItem position, store it.
613
entry->curItem = entry->list[entry->offset - 1];
621
#define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
622
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
625
* Sets entry->curItem to next heap item pointer for one entry of one scan key,
626
* or sets entry->isFinished to TRUE if there are no more.
628
* Item pointers must be returned in ascending order.
630
* Note: this can return a "lossy page" item pointer, indicating that the
631
* entry potentially matches all items on that heap page. However, it is
632
* not allowed to return both a lossy page pointer and exact (regular)
633
* item pointers for the same page. (Doing so would break the key-combination
634
* logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
635
* current implementation this is guaranteed by the behavior of tidbitmaps.
638
entryGetItem(GinState *ginstate, GinScanEntry entry)
640
Assert(!entry->isFinished);
642
if (entry->matchBitmap)
646
if (entry->matchResult == NULL ||
647
entry->offset >= entry->matchResult->ntuples)
649
entry->matchResult = tbm_iterate(entry->matchIterator);
651
if (entry->matchResult == NULL)
653
ItemPointerSetInvalid(&entry->curItem);
654
tbm_end_iterate(entry->matchIterator);
655
entry->matchIterator = NULL;
656
entry->isFinished = TRUE;
661
* Reset counter to the beginning of entry->matchResult. Note:
662
* entry->offset is still greater than matchResult->ntuples if
663
* matchResult is lossy. So, on next call we will get next
664
* result from TIDBitmap.
669
if (entry->matchResult->ntuples < 0)
672
* lossy result, so we need to check the whole page
674
ItemPointerSetLossyPage(&entry->curItem,
675
entry->matchResult->blockno);
678
* We might as well fall out of the loop; we could not
679
* estimate number of results on this page to support correct
680
* reducing of result even if it's enabled
685
ItemPointerSet(&entry->curItem,
686
entry->matchResult->blockno,
687
entry->matchResult->offsets[entry->offset]);
689
} while (entry->reduceResult == TRUE && dropItem(entry));
691
else if (!BufferIsValid(entry->buffer))
694
if (entry->offset <= entry->nlist)
695
entry->curItem = entry->list[entry->offset - 1];
698
ItemPointerSetInvalid(&entry->curItem);
699
entry->isFinished = TRUE;
706
entryGetNextItem(ginstate, entry);
707
} while (entry->isFinished == FALSE &&
708
entry->reduceResult == TRUE &&
714
* Identify the "current" item among the input entry streams for this scan key,
715
* and test whether it passes the scan key qual condition.
717
* The current item is the smallest curItem among the inputs. key->curItem
718
* is set to that value. key->curItemMatches is set to indicate whether that
719
* TID passes the consistentFn test. If so, key->recheckCurItem is set true
720
* iff recheck is needed for this item pointer (including the case where the
721
* item pointer is a lossy page pointer).
723
* If all entry streams are exhausted, sets key->isFinished to TRUE.
725
* Item pointers must be returned in ascending order.
727
* Note: this can return a "lossy page" item pointer, indicating that the
728
* key potentially matches all items on that heap page. However, it is
729
* not allowed to return both a lossy page pointer and exact (regular)
730
* item pointers for the same page. (Doing so would break the key-combination
731
* logic in scanGetItem.)
734
keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
736
ItemPointerData minItem;
737
ItemPointerData curPageLossy;
743
MemoryContext oldCtx;
745
Assert(!key->isFinished);
748
* Find the minimum of the active entry curItems.
750
* Note: a lossy-page entry is encoded by a ItemPointer with max value for
751
* offset (0xffff), so that it will sort after any exact entries for the
752
* same page. So we'll prefer to return exact pointers not lossy
753
* pointers, which is good.
755
ItemPointerSetMax(&minItem);
757
for (i = 0; i < key->nentries; i++)
759
entry = key->scanEntry[i];
760
if (entry->isFinished == FALSE &&
761
ginCompareItemPointers(&entry->curItem, &minItem) < 0)
762
minItem = entry->curItem;
765
if (ItemPointerIsMax(&minItem))
767
/* all entries are finished */
768
key->isFinished = TRUE;
773
* We might have already tested this item; if so, no need to repeat work.
774
* (Note: the ">" case can happen, if minItem is exact but we previously
775
* had to set curItem to a lossy-page pointer.)
777
if (ginCompareItemPointers(&key->curItem, &minItem) >= 0)
781
* OK, advance key->curItem and perform consistentFn test.
783
key->curItem = minItem;
786
* Lossy-page entries pose a problem, since we don't know the correct
787
* entryRes state to pass to the consistentFn, and we also don't know what
788
* its combining logic will be (could be AND, OR, or even NOT). If the
789
* logic is OR then the consistentFn might succeed for all items in the
790
* lossy page even when none of the other entries match.
792
* If we have a single lossy-page entry then we check to see if the
793
* consistentFn will succeed with only that entry TRUE. If so, we return
794
* a lossy-page pointer to indicate that the whole heap page must be
795
* checked. (On subsequent calls, we'll do nothing until minItem is past
796
* the page altogether, thus ensuring that we never return both regular
797
* and lossy pointers for the same page.)
799
* This idea could be generalized to more than one lossy-page entry, but
800
* ideally lossy-page entries should be infrequent so it would seldom be
801
* the case that we have more than one at once. So it doesn't seem worth
802
* the extra complexity to optimize that case. If we do find more than
803
* one, we just punt and return a lossy-page pointer always.
805
* Note that only lossy-page entries pointing to the current item's page
806
* should trigger this processing; we might have future lossy pages in the
807
* entry array, but they aren't relevant yet.
809
ItemPointerSetLossyPage(&curPageLossy,
810
GinItemPointerGetBlockNumber(&key->curItem));
813
haveLossyEntry = false;
814
for (i = 0; i < key->nentries; i++)
816
entry = key->scanEntry[i];
817
if (entry->isFinished == FALSE &&
818
ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
822
/* Multiple lossy entries, punt */
823
key->curItem = curPageLossy;
824
key->curItemMatches = true;
825
key->recheckCurItem = true;
829
haveLossyEntry = true;
833
/* prepare for calling consistentFn in temp context */
834
oldCtx = MemoryContextSwitchTo(tempCtx);
838
/* Single lossy-page entry, so see if whole page matches */
839
memset(key->entryRes, FALSE, key->nentries);
840
key->entryRes[lossyEntry] = TRUE;
842
if (callConsistentFn(ginstate, key))
844
/* Yes, so clean up ... */
845
MemoryContextSwitchTo(oldCtx);
846
MemoryContextReset(tempCtx);
848
/* and return lossy pointer for whole page */
849
key->curItem = curPageLossy;
850
key->curItemMatches = true;
851
key->recheckCurItem = true;
857
* At this point we know that we don't need to return a lossy whole-page
858
* pointer, but we might have matches for individual exact item pointers,
859
* possibly in combination with a lossy pointer. Our strategy if there's
860
* a lossy pointer is to try the consistentFn both ways and return a hit
861
* if it accepts either one (forcing the hit to be marked lossy so it will
862
* be rechecked). An exception is that we don't need to try it both ways
863
* if the lossy pointer is in a "hidden" entry, because the consistentFn's
864
* result can't depend on that.
866
* Prepare entryRes array to be passed to consistentFn.
868
for (i = 0; i < key->nentries; i++)
870
entry = key->scanEntry[i];
871
if (entry->isFinished == FALSE &&
872
ginCompareItemPointers(&entry->curItem, &key->curItem) == 0)
873
key->entryRes[i] = TRUE;
875
key->entryRes[i] = FALSE;
878
key->entryRes[lossyEntry] = TRUE;
880
res = callConsistentFn(ginstate, key);
882
if (!res && haveLossyEntry && lossyEntry < key->nuserentries)
884
/* try the other way for the lossy item */
885
key->entryRes[lossyEntry] = FALSE;
887
res = callConsistentFn(ginstate, key);
890
key->curItemMatches = res;
891
/* If we matched a lossy entry, force recheckCurItem = true */
893
key->recheckCurItem = true;
895
/* clean up after consistentFn calls */
896
MemoryContextSwitchTo(oldCtx);
897
MemoryContextReset(tempCtx);
901
* Get next heap item pointer (after advancePast) from scan.
902
* Returns true if anything found.
903
* On success, *item and *recheck are set.
905
* Note: this is very nearly the same logic as in keyGetItem(), except
906
* that we know the keys are to be combined with AND logic, whereas in
907
* keyGetItem() the combination logic is known only to the consistentFn.
910
scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
911
ItemPointerData *item, bool *recheck)
913
GinScanOpaque so = (GinScanOpaque) scan->opaque;
914
GinState *ginstate = &so->ginstate;
915
ItemPointerData myAdvancePast = *advancePast;
923
* Advance any entries that are <= myAdvancePast. In particular,
924
* since entry->curItem was initialized with ItemPointerSetMin, this
925
* ensures we fetch the first item for each entry on the first call.
929
for (i = 0; i < so->totalentries; i++)
931
GinScanEntry entry = so->entries[i];
933
while (entry->isFinished == FALSE &&
934
ginCompareItemPointers(&entry->curItem,
935
&myAdvancePast) <= 0)
936
entryGetItem(ginstate, entry);
938
if (entry->isFinished == FALSE)
944
/* all entries exhausted, so we're done */
949
* Perform the consistentFn test for each scan key. If any key
950
* reports isFinished, meaning its subset of the entries is exhausted,
951
* we can stop. Otherwise, set *item to the minimum of the key
954
ItemPointerSetMax(item);
956
for (i = 0; i < so->nkeys; i++)
958
GinScanKey key = so->keys + i;
960
keyGetItem(&so->ginstate, so->tempCtx, key);
963
return false; /* finished one of keys */
965
if (ginCompareItemPointers(&key->curItem, item) < 0)
966
*item = key->curItem;
969
Assert(!ItemPointerIsMax(item));
972
* Now *item contains first ItemPointer after previous result.
974
* The item is a valid hit only if all the keys succeeded for either
975
* that exact TID, or a lossy reference to the same page.
977
* This logic works only if a keyGetItem stream can never contain both
978
* exact and lossy pointers for the same page. Else we could have a
987
* We would conclude that 42/6 is not a match and advance stream 1,
988
* thus never detecting the match to the lossy pointer in stream 2.
989
* (keyGetItem has a similar problem versus entryGetItem.)
993
for (i = 0; i < so->nkeys; i++)
995
GinScanKey key = so->keys + i;
997
if (key->curItemMatches)
999
if (ginCompareItemPointers(item, &key->curItem) == 0)
1001
if (ItemPointerIsLossyPage(&key->curItem) &&
1002
GinItemPointerGetBlockNumber(&key->curItem) ==
1003
GinItemPointerGetBlockNumber(item))
1014
* No hit. Update myAdvancePast to this TID, so that on the next pass
1015
* we'll move to the next possible entry.
1017
myAdvancePast = *item;
1021
* We must return recheck = true if any of the keys are marked recheck.
1024
for (i = 0; i < so->nkeys; i++)
1026
GinScanKey key = so->keys + i;
1028
if (key->recheckCurItem)
1040
* Functions for scanning the pending list
1045
* Get ItemPointer of next heap row to be checked from pending list.
1046
* Returns false if there are no more. On pages with several heap rows
1047
* it returns each row separately, on page with part of heap row returns
1048
* per page data. pos->firstOffset and pos->lastOffset are set to identify
1049
* the range of pending-list tuples belonging to this heap row.
1051
* The pendingBuffer is presumed pinned and share-locked on entry, and is
1052
* pinned and share-locked on success exit. On failure exit it's released.
1055
scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
1057
OffsetNumber maxoff;
1061
ItemPointerSetInvalid(&pos->item);
1064
page = BufferGetPage(pos->pendingBuffer);
1066
maxoff = PageGetMaxOffsetNumber(page);
1067
if (pos->firstOffset > maxoff)
1069
BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1071
if (blkno == InvalidBlockNumber)
1073
UnlockReleaseBuffer(pos->pendingBuffer);
1074
pos->pendingBuffer = InvalidBuffer;
1081
* Here we must prevent deletion of next page by insertcleanup
1082
* process, which may be trying to obtain exclusive lock on
1083
* current page. So, we lock next page before releasing the
1086
Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1088
LockBuffer(tmpbuf, GIN_SHARE);
1089
UnlockReleaseBuffer(pos->pendingBuffer);
1091
pos->pendingBuffer = tmpbuf;
1092
pos->firstOffset = FirstOffsetNumber;
1097
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1098
pos->item = itup->t_tid;
1099
if (GinPageHasFullRow(page))
1102
* find itempointer to the next row
1104
for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1106
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1107
if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1114
* All itempointers are the same on this page
1116
pos->lastOffset = maxoff + 1;
1120
* Now pos->firstOffset points to the first tuple of current heap
1121
* row, pos->lastOffset points to the first tuple of next heap row
1122
* (or to the end of page)
1132
* Scan pending-list page from current tuple (off) up till the first of:
1133
* - match is found (then returns true)
1134
* - no later match is possible
1135
* - tuple's attribute number is not equal to entry's attrnum
1136
* - reach end of page
1138
* datum[]/category[]/datumExtracted[] arrays are used to cache the results
1139
* of gintuple_get_key() on the current page.
1142
matchPartialInPendingList(GinState *ginstate, Page page,
1143
OffsetNumber off, OffsetNumber maxoff,
1145
Datum *datum, GinNullCategory *category,
1146
bool *datumExtracted)
1151
/* Partial match to a null is not possible */
1152
if (entry->queryCategory != GIN_CAT_NORM_KEY)
1155
while (off < maxoff)
1157
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1159
if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1162
if (datumExtracted[off - 1] == false)
1164
datum[off - 1] = gintuple_get_key(ginstate, itup,
1165
&category[off - 1]);
1166
datumExtracted[off - 1] = true;
1169
/* Once we hit nulls, no further match is possible */
1170
if (category[off - 1] != GIN_CAT_NORM_KEY)
1174
* Check partial match.
1175
* case cmp == 0 => match
1176
* case cmp > 0 => not match and end scan (no later match possible)
1177
* case cmp < 0 => not match and continue scan
1180
cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1181
ginstate->supportCollation[entry->attnum - 1],
1184
UInt16GetDatum(entry->strategy),
1185
PointerGetDatum(entry->extra_data)));
1198
* Set up the entryRes array for each key by looking at
1199
* every entry for current heap row in pending list.
1201
* Returns true if each scan key has at least one entryRes match.
1202
* This corresponds to the situations where the normal index search will
1203
* try to apply the key's consistentFn. (A tuple not meeting that requirement
1204
* cannot be returned by the normal search since no entry stream will
1207
* The pendingBuffer is presumed pinned and share-locked on entry.
1210
collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
1212
GinScanOpaque so = (GinScanOpaque) scan->opaque;
1213
OffsetNumber attrnum;
1220
* Reset all entryRes and hasMatchKey flags
1222
for (i = 0; i < so->nkeys; i++)
1224
GinScanKey key = so->keys + i;
1226
memset(key->entryRes, FALSE, key->nentries);
1228
memset(pos->hasMatchKey, FALSE, so->nkeys);
1231
* Outer loop iterates over multiple pending-list pages when a single heap
1232
* row has entries spanning those pages.
1236
Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1237
GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1238
bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1240
Assert(pos->lastOffset > pos->firstOffset);
1241
memset(datumExtracted + pos->firstOffset - 1, 0,
1242
sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1244
page = BufferGetPage(pos->pendingBuffer);
1246
for (i = 0; i < so->nkeys; i++)
1248
GinScanKey key = so->keys + i;
1250
for (j = 0; j < key->nentries; j++)
1252
GinScanEntry entry = key->scanEntry[j];
1253
OffsetNumber StopLow = pos->firstOffset,
1254
StopHigh = pos->lastOffset,
1257
/* If already matched on earlier page, do no extra work */
1258
if (key->entryRes[j])
1262
* Interesting tuples are from pos->firstOffset to
1263
* pos->lastOffset and they are ordered by (attnum, Datum) as
1264
* it's done in entry tree. So we can use binary search to
1265
* avoid linear scanning.
1267
while (StopLow < StopHigh)
1271
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1273
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1275
attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1277
if (key->attnum < attrnum)
1279
StopHigh = StopMiddle;
1282
if (key->attnum > attrnum)
1284
StopLow = StopMiddle + 1;
1288
if (datumExtracted[StopMiddle - 1] == false)
1290
datum[StopMiddle - 1] =
1291
gintuple_get_key(&so->ginstate, itup,
1292
&category[StopMiddle - 1]);
1293
datumExtracted[StopMiddle - 1] = true;
1296
if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1298
/* special behavior depending on searchMode */
1299
if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1301
/* match anything except NULL_ITEM */
1302
if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1309
/* match everything */
1315
res = ginCompareEntries(&so->ginstate,
1318
entry->queryCategory,
1319
datum[StopMiddle - 1],
1320
category[StopMiddle - 1]);
1326
* Found exact match (there can be only one, except in
1327
* EMPTY_QUERY mode).
1329
* If doing partial match, scan forward from here to
1330
* end of page to check for matches.
1332
* See comment above about tuple's ordering.
1334
if (entry->isPartialMatch)
1336
matchPartialInPendingList(&so->ginstate,
1345
key->entryRes[j] = true;
1347
/* done with binary search */
1351
StopHigh = StopMiddle;
1353
StopLow = StopMiddle + 1;
1356
if (StopLow >= StopHigh && entry->isPartialMatch)
1359
* No exact match on this page. If doing partial match,
1360
* scan from the first tuple greater than target value to
1361
* end of page. Note that since we don't remember whether
1362
* the comparePartialFn told us to stop early on a
1363
* previous page, we will uselessly apply comparePartialFn
1364
* to the first tuple on each subsequent page.
1367
matchPartialInPendingList(&so->ginstate,
1377
pos->hasMatchKey[i] |= key->entryRes[j];
1381
/* Advance firstOffset over the scanned tuples */
1382
pos->firstOffset = pos->lastOffset;
1384
if (GinPageHasFullRow(page))
1387
* We have examined all pending entries for the current heap row.
1388
* Break out of loop over pages.
1395
* Advance to next page of pending entries for the current heap
1396
* row. Complain if there isn't one.
1398
ItemPointerData item = pos->item;
1400
if (scanGetCandidate(scan, pos) == false ||
1401
!ItemPointerEquals(&pos->item, &item))
1402
elog(ERROR, "could not find additional pending pages for same heap tuple");
1407
* Now return "true" if all scan keys have at least one matching datum
1409
for (i = 0; i < so->nkeys; i++)
1411
if (pos->hasMatchKey[i] == false)
1419
* Collect all matched rows from pending list into bitmap
1422
scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1424
GinScanOpaque so = (GinScanOpaque) scan->opaque;
1425
MemoryContext oldCtx;
1429
pendingPosition pos;
1430
Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1435
LockBuffer(metabuffer, GIN_SHARE);
1436
blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
1439
* fetch head of list before unlocking metapage. head page must be pinned
1440
* to prevent deletion by vacuum process
1442
if (blkno == InvalidBlockNumber)
1444
/* No pending list, so proceed with normal scan */
1445
UnlockReleaseBuffer(metabuffer);
1449
pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1450
LockBuffer(pos.pendingBuffer, GIN_SHARE);
1451
pos.firstOffset = FirstOffsetNumber;
1452
UnlockReleaseBuffer(metabuffer);
1453
pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1456
* loop for each heap row. scanGetCandidate returns full row or row's
1457
* tuples from first page.
1459
while (scanGetCandidate(scan, &pos))
1462
* Check entries in tuple and set up entryRes array.
1464
* If pending tuples belonging to the current heap row are spread
1465
* across several pages, collectMatchesForHeapRow will read all of
1468
if (!collectMatchesForHeapRow(scan, &pos))
1472
* Matching of entries of one row is finished, so check row using
1473
* consistent functions.
1475
oldCtx = MemoryContextSwitchTo(so->tempCtx);
1479
for (i = 0; i < so->nkeys; i++)
1481
GinScanKey key = so->keys + i;
1483
if (!callConsistentFn(&so->ginstate, key))
1488
recheck |= key->recheckCurItem;
1491
MemoryContextSwitchTo(oldCtx);
1492
MemoryContextReset(so->tempCtx);
1496
tbm_add_tuples(tbm, &pos.item, 1, recheck);
1501
pfree(pos.hasMatchKey);
1505
#define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL )
1506
#define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1509
gingetbitmap(PG_FUNCTION_ARGS)
1511
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
1512
TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
1514
ItemPointerData iptr;
1518
* Set up the scan keys, and check for unsatisfiable query.
1520
if (GinIsNewKey(scan))
1521
ginNewScanKey(scan);
1523
if (GinIsVoidRes(scan))
1529
* First, scan the pending list and collect any matching entries into the
1530
* bitmap. After we scan a pending item, some other backend could post it
1531
* into the main index, and so we might visit it a second time during the
1532
* main scan. This is okay because we'll just re-set the same bit in the
1533
* bitmap. (The possibility of duplicate visits is a major reason why GIN
1534
* can't support the amgettuple API, however.) Note that it would not do
1535
* to scan the main index before the pending list, since concurrent
1536
* cleanup could then make us miss entries entirely.
1538
scanPendingInsert(scan, tbm, &ntids);
1541
* Now scan the main index.
1545
ItemPointerSetMin(&iptr);
1549
CHECK_FOR_INTERRUPTS();
1551
if (!scanGetItem(scan, &iptr, &iptr, &recheck))
1554
if (ItemPointerIsLossyPage(&iptr))
1555
tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
1557
tbm_add_tuples(tbm, &iptr, 1, recheck);
1561
PG_RETURN_INT64(ntids);