~ubuntu-branches/ubuntu/oneiric/postgresql-9.1/oneiric-security

« back to all changes in this revision

Viewing changes to src/backend/access/gin/ginget.c

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2011-05-11 10:41:53 UTC
  • Revision ID: james.westby@ubuntu.com-20110511104153-psbh2o58553fv1m0
Tags: upstream-9.1~beta1
ImportĀ upstreamĀ versionĀ 9.1~beta1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * ginget.c
 
4
 *        fetch tuples from a GIN scan.
 
5
 *
 
6
 *
 
7
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
 
8
 * Portions Copyright (c) 1994, Regents of the University of California
 
9
 *
 
10
 * IDENTIFICATION
 
11
 *                      src/backend/access/gin/ginget.c
 
12
 *-------------------------------------------------------------------------
 
13
 */
 
14
 
 
15
#include "postgres.h"
 
16
 
 
17
#include "access/gin_private.h"
 
18
#include "access/relscan.h"
 
19
#include "catalog/index.h"
 
20
#include "miscadmin.h"
 
21
#include "storage/bufmgr.h"
 
22
#include "utils/datum.h"
 
23
#include "utils/memutils.h"
 
24
 
 
25
 
 
26
typedef struct pendingPosition
 
27
{
 
28
        Buffer          pendingBuffer;
 
29
        OffsetNumber firstOffset;
 
30
        OffsetNumber lastOffset;
 
31
        ItemPointerData item;
 
32
        bool       *hasMatchKey;
 
33
} pendingPosition;
 
34
 
 
35
 
 
36
/*
 
37
 * Convenience function for invoking a key's consistentFn
 
38
 */
 
39
static bool
 
40
callConsistentFn(GinState *ginstate, GinScanKey key)
 
41
{
 
42
        /*
 
43
         * If we're dealing with a dummy EVERYTHING key, we don't want to call the
 
44
         * consistentFn; just claim it matches.
 
45
         */
 
46
        if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
 
47
        {
 
48
                key->recheckCurItem = false;
 
49
                return true;
 
50
        }
 
51
 
 
52
        /*
 
53
         * Initialize recheckCurItem in case the consistentFn doesn't know it
 
54
         * should set it.  The safe assumption in that case is to force recheck.
 
55
         */
 
56
        key->recheckCurItem = true;
 
57
 
 
58
        return DatumGetBool(FunctionCall8Coll(&ginstate->consistentFn[key->attnum - 1],
 
59
                                                                                  ginstate->supportCollation[key->attnum - 1],
 
60
                                                                                  PointerGetDatum(key->entryRes),
 
61
                                                                                  UInt16GetDatum(key->strategy),
 
62
                                                                                  key->query,
 
63
                                                                                  UInt32GetDatum(key->nuserentries),
 
64
                                                                                  PointerGetDatum(key->extra_data),
 
65
                                                                                  PointerGetDatum(&key->recheckCurItem),
 
66
                                                                                  PointerGetDatum(key->queryValues),
 
67
                                                                                  PointerGetDatum(key->queryCategories)));
 
68
}
 
69
 
 
70
/*
 
71
 * Tries to refind previously taken ItemPointer on a posting page.
 
72
 */
 
73
static bool
 
74
findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off)
 
75
{
 
76
        OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
 
77
        int                     res;
 
78
 
 
79
        if (GinPageGetOpaque(page)->flags & GIN_DELETED)
 
80
                /* page was deleted by concurrent vacuum */
 
81
                return false;
 
82
 
 
83
        /*
 
84
         * scan page to find equal or first greater value
 
85
         */
 
86
        for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
 
87
        {
 
88
                res = ginCompareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
 
89
 
 
90
                if (res <= 0)
 
91
                        return true;
 
92
        }
 
93
 
 
94
        return false;
 
95
}
 
96
 
 
97
/*
 
98
 * Goes to the next page if current offset is outside of bounds
 
99
 */
 
100
static bool
 
101
moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
 
102
{
 
103
        Page            page = BufferGetPage(stack->buffer);
 
104
 
 
105
        if (stack->off > PageGetMaxOffsetNumber(page))
 
106
        {
 
107
                /*
 
108
                 * We scanned the whole page, so we should take right page
 
109
                 */
 
110
                stack->blkno = GinPageGetOpaque(page)->rightlink;
 
111
 
 
112
                if (GinPageRightMost(page))
 
113
                        return false;           /* no more pages */
 
114
 
 
115
                LockBuffer(stack->buffer, GIN_UNLOCK);
 
116
                stack->buffer = ReleaseAndReadBuffer(stack->buffer,
 
117
                                                                                         btree->index,
 
118
                                                                                         stack->blkno);
 
119
                LockBuffer(stack->buffer, GIN_SHARE);
 
120
                stack->off = FirstOffsetNumber;
 
121
        }
 
122
 
 
123
        return true;
 
124
}
 
125
 
 
126
/*
 
127
 * Scan all pages of a posting tree and save all its heap ItemPointers
 
128
 * in scanEntry->matchBitmap
 
129
 */
 
130
static void
 
131
scanPostingTree(Relation index, GinScanEntry scanEntry,
 
132
                                BlockNumber rootPostingTree)
 
133
{
 
134
        GinPostingTreeScan *gdi;
 
135
        Buffer          buffer;
 
136
        Page            page;
 
137
        BlockNumber blkno;
 
138
 
 
139
        /* Descend to the leftmost leaf page */
 
140
        gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE);
 
141
 
 
142
        buffer = ginScanBeginPostingTree(gdi);
 
143
        IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
 
144
 
 
145
        freeGinBtreeStack(gdi->stack);
 
146
        pfree(gdi);
 
147
 
 
148
        /*
 
149
         * Loop iterates through all leaf pages of posting tree
 
150
         */
 
151
        for (;;)
 
152
        {
 
153
                page = BufferGetPage(buffer);
 
154
 
 
155
                if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 &&
 
156
                        GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
 
157
                {
 
158
                        tbm_add_tuples(scanEntry->matchBitmap,
 
159
                                   (ItemPointer) GinDataPageGetItem(page, FirstOffsetNumber),
 
160
                                                   GinPageGetOpaque(page)->maxoff, false);
 
161
                        scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
 
162
                }
 
163
 
 
164
                if (GinPageRightMost(page))
 
165
                        break;                          /* no more pages */
 
166
 
 
167
                blkno = GinPageGetOpaque(page)->rightlink;
 
168
                LockBuffer(buffer, GIN_UNLOCK);
 
169
                buffer = ReleaseAndReadBuffer(buffer, index, blkno);
 
170
                LockBuffer(buffer, GIN_SHARE);
 
171
        }
 
172
 
 
173
        UnlockReleaseBuffer(buffer);
 
174
}
 
175
 
 
176
/*
 
177
 * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
 
178
 * match the search entry.      This supports three different match modes:
 
179
 *
 
180
 * 1. Partial-match support: scan from current point until the
 
181
 *        comparePartialFn says we're done.
 
182
 * 2. SEARCH_MODE_ALL: scan from current point (which should be first
 
183
 *        key for the current attnum) until we hit null items or end of attnum
 
184
 * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
 
185
 *        key for the current attnum) until we hit end of attnum
 
186
 *
 
187
 * Returns true if done, false if it's necessary to restart scan from scratch
 
188
 */
 
189
static bool
 
190
collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
 
191
                                   GinScanEntry scanEntry)
 
192
{
 
193
        OffsetNumber attnum;
 
194
        Form_pg_attribute attr;
 
195
 
 
196
        /* Initialize empty bitmap result */
 
197
        scanEntry->matchBitmap = tbm_create(work_mem * 1024L);
 
198
 
 
199
        /* Null query cannot partial-match anything */
 
200
        if (scanEntry->isPartialMatch &&
 
201
                scanEntry->queryCategory != GIN_CAT_NORM_KEY)
 
202
                return true;
 
203
 
 
204
        /* Locate tupdesc entry for key column (for attbyval/attlen data) */
 
205
        attnum = scanEntry->attnum;
 
206
        attr = btree->ginstate->origTupdesc->attrs[attnum - 1];
 
207
 
 
208
        for (;;)
 
209
        {
 
210
                Page            page;
 
211
                IndexTuple      itup;
 
212
                Datum           idatum;
 
213
                GinNullCategory icategory;
 
214
 
 
215
                /*
 
216
                 * stack->off points to the interested entry, buffer is already locked
 
217
                 */
 
218
                if (moveRightIfItNeeded(btree, stack) == false)
 
219
                        return true;
 
220
 
 
221
                page = BufferGetPage(stack->buffer);
 
222
                itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
 
223
 
 
224
                /*
 
225
                 * If tuple stores another attribute then stop scan
 
226
                 */
 
227
                if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
 
228
                        return true;
 
229
 
 
230
                /* Safe to fetch attribute value */
 
231
                idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
 
232
 
 
233
                /*
 
234
                 * Check for appropriate scan stop conditions
 
235
                 */
 
236
                if (scanEntry->isPartialMatch)
 
237
                {
 
238
                        int32           cmp;
 
239
 
 
240
                        /*
 
241
                         * In partial match, stop scan at any null (including
 
242
                         * placeholders); partial matches never match nulls
 
243
                         */
 
244
                        if (icategory != GIN_CAT_NORM_KEY)
 
245
                                return true;
 
246
 
 
247
                        /*----------
 
248
                         * Check of partial match.
 
249
                         * case cmp == 0 => match
 
250
                         * case cmp > 0 => not match and finish scan
 
251
                         * case cmp < 0 => not match and continue scan
 
252
                         *----------
 
253
                         */
 
254
                        cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
 
255
                                                                                                  btree->ginstate->supportCollation[attnum - 1],
 
256
                                                                                                  scanEntry->queryKey,
 
257
                                                                                                  idatum,
 
258
                                                                                 UInt16GetDatum(scanEntry->strategy),
 
259
                                                                        PointerGetDatum(scanEntry->extra_data)));
 
260
 
 
261
                        if (cmp > 0)
 
262
                                return true;
 
263
                        else if (cmp < 0)
 
264
                        {
 
265
                                stack->off++;
 
266
                                continue;
 
267
                        }
 
268
                }
 
269
                else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
 
270
                {
 
271
                        /*
 
272
                         * In ALL mode, we are not interested in null items, so we can
 
273
                         * stop if we get to a null-item placeholder (which will be the
 
274
                         * last entry for a given attnum).      We do want to include NULL_KEY
 
275
                         * and EMPTY_ITEM entries, though.
 
276
                         */
 
277
                        if (icategory == GIN_CAT_NULL_ITEM)
 
278
                                return true;
 
279
                }
 
280
 
 
281
                /*
 
282
                 * OK, we want to return the TIDs listed in this entry.
 
283
                 */
 
284
                if (GinIsPostingTree(itup))
 
285
                {
 
286
                        BlockNumber rootPostingTree = GinGetPostingTree(itup);
 
287
 
 
288
                        /*
 
289
                         * We should unlock current page (but not unpin) during tree scan
 
290
                         * to prevent deadlock with vacuum processes.
 
291
                         *
 
292
                         * We save current entry value (idatum) to be able to re-find our
 
293
                         * tuple after re-locking
 
294
                         */
 
295
                        if (icategory == GIN_CAT_NORM_KEY)
 
296
                                idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
 
297
 
 
298
                        LockBuffer(stack->buffer, GIN_UNLOCK);
 
299
 
 
300
                        /* Collect all the TIDs in this entry's posting tree */
 
301
                        scanPostingTree(btree->index, scanEntry, rootPostingTree);
 
302
 
 
303
                        /*
 
304
                         * We lock again the entry page and while it was unlocked insert
 
305
                         * might have occurred, so we need to re-find our position.
 
306
                         */
 
307
                        LockBuffer(stack->buffer, GIN_SHARE);
 
308
                        page = BufferGetPage(stack->buffer);
 
309
                        if (!GinPageIsLeaf(page))
 
310
                        {
 
311
                                /*
 
312
                                 * Root page becomes non-leaf while we unlock it. We will
 
313
                                 * start again, this situation doesn't occur often - root can
 
314
                                 * became a non-leaf only once per life of index.
 
315
                                 */
 
316
                                return false;
 
317
                        }
 
318
 
 
319
                        /* Search forward to re-find idatum */
 
320
                        for (;;)
 
321
                        {
 
322
                                Datum           newDatum;
 
323
                                GinNullCategory newCategory;
 
324
 
 
325
                                if (moveRightIfItNeeded(btree, stack) == false)
 
326
                                        elog(ERROR, "lost saved point in index");       /* must not happen !!! */
 
327
 
 
328
                                page = BufferGetPage(stack->buffer);
 
329
                                itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
 
330
 
 
331
                                if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
 
332
                                        elog(ERROR, "lost saved point in index");       /* must not happen !!! */
 
333
                                newDatum = gintuple_get_key(btree->ginstate, itup,
 
334
                                                                                        &newCategory);
 
335
 
 
336
                                if (ginCompareEntries(btree->ginstate, attnum,
 
337
                                                                          newDatum, newCategory,
 
338
                                                                          idatum, icategory) == 0)
 
339
                                        break;          /* Found! */
 
340
 
 
341
                                stack->off++;
 
342
                        }
 
343
 
 
344
                        if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
 
345
                                pfree(DatumGetPointer(idatum));
 
346
                }
 
347
                else
 
348
                {
 
349
                        tbm_add_tuples(scanEntry->matchBitmap,
 
350
                                                   GinGetPosting(itup), GinGetNPosting(itup), false);
 
351
                        scanEntry->predictNumberResult += GinGetNPosting(itup);
 
352
                }
 
353
 
 
354
                /*
 
355
                 * Done with this entry, go to the next
 
356
                 */
 
357
                stack->off++;
 
358
        }
 
359
 
 
360
        return true;
 
361
}
 
362
 
 
363
/*
 
364
 * Start* functions setup beginning state of searches: finds correct buffer and pins it.
 
365
 */
 
366
static void
 
367
startScanEntry(GinState *ginstate, GinScanEntry entry)
 
368
{
 
369
        GinBtreeData btreeEntry;
 
370
        GinBtreeStack *stackEntry;
 
371
        Page            page;
 
372
        bool            needUnlock;
 
373
 
 
374
restartScanEntry:
 
375
        entry->buffer = InvalidBuffer;
 
376
        ItemPointerSetMin(&entry->curItem);
 
377
        entry->offset = InvalidOffsetNumber;
 
378
        entry->list = NULL;
 
379
        entry->nlist = 0;
 
380
        entry->matchBitmap = NULL;
 
381
        entry->matchResult = NULL;
 
382
        entry->reduceResult = FALSE;
 
383
        entry->predictNumberResult = 0;
 
384
 
 
385
        /*
 
386
         * we should find entry, and begin scan of posting tree or just store
 
387
         * posting list in memory
 
388
         */
 
389
        ginPrepareEntryScan(&btreeEntry, entry->attnum,
 
390
                                                entry->queryKey, entry->queryCategory,
 
391
                                                ginstate);
 
392
        btreeEntry.searchMode = TRUE;
 
393
        stackEntry = ginFindLeafPage(&btreeEntry, NULL);
 
394
        page = BufferGetPage(stackEntry->buffer);
 
395
        needUnlock = TRUE;
 
396
 
 
397
        entry->isFinished = TRUE;
 
398
 
 
399
        if (entry->isPartialMatch ||
 
400
                entry->queryCategory == GIN_CAT_EMPTY_QUERY)
 
401
        {
 
402
                /*
 
403
                 * btreeEntry.findItem locates the first item >= given search key.
 
404
                 * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
 
405
                 * because of the way the GIN_CAT_EMPTY_QUERY category code is
 
406
                 * assigned.)  We scan forward from there and collect all TIDs needed
 
407
                 * for the entry type.
 
408
                 */
 
409
                btreeEntry.findItem(&btreeEntry, stackEntry);
 
410
                if (collectMatchBitmap(&btreeEntry, stackEntry, entry) == false)
 
411
                {
 
412
                        /*
 
413
                         * GIN tree was seriously restructured, so we will cleanup all
 
414
                         * found data and rescan. See comments near 'return false' in
 
415
                         * collectMatchBitmap()
 
416
                         */
 
417
                        if (entry->matchBitmap)
 
418
                        {
 
419
                                if (entry->matchIterator)
 
420
                                        tbm_end_iterate(entry->matchIterator);
 
421
                                entry->matchIterator = NULL;
 
422
                                tbm_free(entry->matchBitmap);
 
423
                                entry->matchBitmap = NULL;
 
424
                        }
 
425
                        LockBuffer(stackEntry->buffer, GIN_UNLOCK);
 
426
                        freeGinBtreeStack(stackEntry);
 
427
                        goto restartScanEntry;
 
428
                }
 
429
 
 
430
                if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
 
431
                {
 
432
                        entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
 
433
                        entry->isFinished = FALSE;
 
434
                }
 
435
        }
 
436
        else if (btreeEntry.findItem(&btreeEntry, stackEntry))
 
437
        {
 
438
                IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
 
439
 
 
440
                if (GinIsPostingTree(itup))
 
441
                {
 
442
                        BlockNumber rootPostingTree = GinGetPostingTree(itup);
 
443
                        GinPostingTreeScan *gdi;
 
444
                        Page            page;
 
445
 
 
446
                        /*
 
447
                         * We should unlock entry page before touching posting tree to
 
448
                         * prevent deadlocks with vacuum processes. Because entry is never
 
449
                         * deleted from page and posting tree is never reduced to the
 
450
                         * posting list, we can unlock page after getting BlockNumber of
 
451
                         * root of posting tree.
 
452
                         */
 
453
                        LockBuffer(stackEntry->buffer, GIN_UNLOCK);
 
454
                        needUnlock = FALSE;
 
455
                        gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, TRUE);
 
456
 
 
457
                        entry->buffer = ginScanBeginPostingTree(gdi);
 
458
 
 
459
                        /*
 
460
                         * We keep buffer pinned because we need to prevent deletion of
 
461
                         * page during scan. See GIN's vacuum implementation. RefCount is
 
462
                         * increased to keep buffer pinned after freeGinBtreeStack() call.
 
463
                         */
 
464
                        IncrBufferRefCount(entry->buffer);
 
465
 
 
466
                        page = BufferGetPage(entry->buffer);
 
467
                        entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
 
468
 
 
469
                        /*
 
470
                         * Keep page content in memory to prevent durable page locking
 
471
                         */
 
472
                        entry->list = (ItemPointerData *) palloc(BLCKSZ);
 
473
                        entry->nlist = GinPageGetOpaque(page)->maxoff;
 
474
                        memcpy(entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
 
475
                                   GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
 
476
 
 
477
                        LockBuffer(entry->buffer, GIN_UNLOCK);
 
478
                        freeGinBtreeStack(gdi->stack);
 
479
                        pfree(gdi);
 
480
                        entry->isFinished = FALSE;
 
481
                }
 
482
                else if (GinGetNPosting(itup) > 0)
 
483
                {
 
484
                        entry->nlist = GinGetNPosting(itup);
 
485
                        entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
 
486
                        memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
 
487
                        entry->isFinished = FALSE;
 
488
                }
 
489
        }
 
490
 
 
491
        if (needUnlock)
 
492
                LockBuffer(stackEntry->buffer, GIN_UNLOCK);
 
493
        freeGinBtreeStack(stackEntry);
 
494
}
 
495
 
 
496
static void
 
497
startScanKey(GinState *ginstate, GinScanKey key)
 
498
{
 
499
        ItemPointerSetMin(&key->curItem);
 
500
        key->curItemMatches = false;
 
501
        key->recheckCurItem = false;
 
502
        key->isFinished = false;
 
503
}
 
504
 
 
505
static void
 
506
startScan(IndexScanDesc scan)
 
507
{
 
508
        GinScanOpaque so = (GinScanOpaque) scan->opaque;
 
509
        GinState   *ginstate = &so->ginstate;
 
510
        uint32          i;
 
511
 
 
512
        for (i = 0; i < so->totalentries; i++)
 
513
                startScanEntry(ginstate, so->entries[i]);
 
514
 
 
515
        if (GinFuzzySearchLimit > 0)
 
516
        {
 
517
                /*
 
518
                 * If all of keys more than threshold we will try to reduce result, we
 
519
                 * hope (and only hope, for intersection operation of array our
 
520
                 * supposition isn't true), that total result will not more than
 
521
                 * minimal predictNumberResult.
 
522
                 */
 
523
 
 
524
                for (i = 0; i < so->totalentries; i++)
 
525
                        if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
 
526
                                return;
 
527
 
 
528
                for (i = 0; i < so->totalentries; i++)
 
529
                        if (so->entries[i]->predictNumberResult > so->totalentries * GinFuzzySearchLimit)
 
530
                        {
 
531
                                so->entries[i]->predictNumberResult /= so->totalentries;
 
532
                                so->entries[i]->reduceResult = TRUE;
 
533
                        }
 
534
        }
 
535
 
 
536
        for (i = 0; i < so->nkeys; i++)
 
537
                startScanKey(ginstate, so->keys + i);
 
538
}
 
539
 
 
540
/*
 
541
 * Gets next ItemPointer from PostingTree. Note, that we copy
 
542
 * page into GinScanEntry->list array and unlock page, but keep it pinned
 
543
 * to prevent interference with vacuum
 
544
 */
 
545
static void
 
546
entryGetNextItem(GinState *ginstate, GinScanEntry entry)
 
547
{
 
548
        Page            page;
 
549
        BlockNumber blkno;
 
550
 
 
551
        for (;;)
 
552
        {
 
553
                if (entry->offset < entry->nlist)
 
554
                {
 
555
                        entry->curItem = entry->list[entry->offset++];
 
556
                        return;
 
557
                }
 
558
 
 
559
                LockBuffer(entry->buffer, GIN_SHARE);
 
560
                page = BufferGetPage(entry->buffer);
 
561
                for (;;)
 
562
                {
 
563
                        /*
 
564
                         * It's needed to go by right link. During that we should refind
 
565
                         * first ItemPointer greater that stored
 
566
                         */
 
567
 
 
568
                        blkno = GinPageGetOpaque(page)->rightlink;
 
569
 
 
570
                        LockBuffer(entry->buffer, GIN_UNLOCK);
 
571
                        if (blkno == InvalidBlockNumber)
 
572
                        {
 
573
                                ReleaseBuffer(entry->buffer);
 
574
                                ItemPointerSetInvalid(&entry->curItem);
 
575
                                entry->buffer = InvalidBuffer;
 
576
                                entry->isFinished = TRUE;
 
577
                                return;
 
578
                        }
 
579
 
 
580
                        entry->buffer = ReleaseAndReadBuffer(entry->buffer,
 
581
                                                                                                 ginstate->index,
 
582
                                                                                                 blkno);
 
583
                        LockBuffer(entry->buffer, GIN_SHARE);
 
584
                        page = BufferGetPage(entry->buffer);
 
585
 
 
586
                        entry->offset = InvalidOffsetNumber;
 
587
                        if (!ItemPointerIsValid(&entry->curItem) ||
 
588
                                findItemInPostingPage(page, &entry->curItem, &entry->offset))
 
589
                        {
 
590
                                /*
 
591
                                 * Found position equal to or greater than stored
 
592
                                 */
 
593
                                entry->nlist = GinPageGetOpaque(page)->maxoff;
 
594
                                memcpy(entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
 
595
                                   GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
 
596
 
 
597
                                LockBuffer(entry->buffer, GIN_UNLOCK);
 
598
 
 
599
                                if (!ItemPointerIsValid(&entry->curItem) ||
 
600
                                        ginCompareItemPointers(&entry->curItem,
 
601
                                                                           entry->list + entry->offset - 1) == 0)
 
602
                                {
 
603
                                        /*
 
604
                                         * First pages are deleted or empty, or we found exact
 
605
                                         * position, so break inner loop and continue outer one.
 
606
                                         */
 
607
                                        break;
 
608
                                }
 
609
 
 
610
                                /*
 
611
                                 * Find greater than entry->curItem position, store it.
 
612
                                 */
 
613
                                entry->curItem = entry->list[entry->offset - 1];
 
614
 
 
615
                                return;
 
616
                        }
 
617
                }
 
618
        }
 
619
}
 
620
 
 
621
#define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
 
622
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
 
623
 
 
624
/*
 
625
 * Sets entry->curItem to next heap item pointer for one entry of one scan key,
 
626
 * or sets entry->isFinished to TRUE if there are no more.
 
627
 *
 
628
 * Item pointers must be returned in ascending order.
 
629
 *
 
630
 * Note: this can return a "lossy page" item pointer, indicating that the
 
631
 * entry potentially matches all items on that heap page.  However, it is
 
632
 * not allowed to return both a lossy page pointer and exact (regular)
 
633
 * item pointers for the same page.  (Doing so would break the key-combination
 
634
 * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
 
635
 * current implementation this is guaranteed by the behavior of tidbitmaps.
 
636
 */
 
637
static void
 
638
entryGetItem(GinState *ginstate, GinScanEntry entry)
 
639
{
 
640
        Assert(!entry->isFinished);
 
641
 
 
642
        if (entry->matchBitmap)
 
643
        {
 
644
                do
 
645
                {
 
646
                        if (entry->matchResult == NULL ||
 
647
                                entry->offset >= entry->matchResult->ntuples)
 
648
                        {
 
649
                                entry->matchResult = tbm_iterate(entry->matchIterator);
 
650
 
 
651
                                if (entry->matchResult == NULL)
 
652
                                {
 
653
                                        ItemPointerSetInvalid(&entry->curItem);
 
654
                                        tbm_end_iterate(entry->matchIterator);
 
655
                                        entry->matchIterator = NULL;
 
656
                                        entry->isFinished = TRUE;
 
657
                                        break;
 
658
                                }
 
659
 
 
660
                                /*
 
661
                                 * Reset counter to the beginning of entry->matchResult. Note:
 
662
                                 * entry->offset is still greater than matchResult->ntuples if
 
663
                                 * matchResult is lossy.  So, on next call we will get next
 
664
                                 * result from TIDBitmap.
 
665
                                 */
 
666
                                entry->offset = 0;
 
667
                        }
 
668
 
 
669
                        if (entry->matchResult->ntuples < 0)
 
670
                        {
 
671
                                /*
 
672
                                 * lossy result, so we need to check the whole page
 
673
                                 */
 
674
                                ItemPointerSetLossyPage(&entry->curItem,
 
675
                                                                                entry->matchResult->blockno);
 
676
 
 
677
                                /*
 
678
                                 * We might as well fall out of the loop; we could not
 
679
                                 * estimate number of results on this page to support correct
 
680
                                 * reducing of result even if it's enabled
 
681
                                 */
 
682
                                break;
 
683
                        }
 
684
 
 
685
                        ItemPointerSet(&entry->curItem,
 
686
                                                   entry->matchResult->blockno,
 
687
                                                   entry->matchResult->offsets[entry->offset]);
 
688
                        entry->offset++;
 
689
                } while (entry->reduceResult == TRUE && dropItem(entry));
 
690
        }
 
691
        else if (!BufferIsValid(entry->buffer))
 
692
        {
 
693
                entry->offset++;
 
694
                if (entry->offset <= entry->nlist)
 
695
                        entry->curItem = entry->list[entry->offset - 1];
 
696
                else
 
697
                {
 
698
                        ItemPointerSetInvalid(&entry->curItem);
 
699
                        entry->isFinished = TRUE;
 
700
                }
 
701
        }
 
702
        else
 
703
        {
 
704
                do
 
705
                {
 
706
                        entryGetNextItem(ginstate, entry);
 
707
                } while (entry->isFinished == FALSE &&
 
708
                                 entry->reduceResult == TRUE &&
 
709
                                 dropItem(entry));
 
710
        }
 
711
}
 
712
 
 
713
/*
 
714
 * Identify the "current" item among the input entry streams for this scan key,
 
715
 * and test whether it passes the scan key qual condition.
 
716
 *
 
717
 * The current item is the smallest curItem among the inputs.  key->curItem
 
718
 * is set to that value.  key->curItemMatches is set to indicate whether that
 
719
 * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
 
720
 * iff recheck is needed for this item pointer (including the case where the
 
721
 * item pointer is a lossy page pointer).
 
722
 *
 
723
 * If all entry streams are exhausted, sets key->isFinished to TRUE.
 
724
 *
 
725
 * Item pointers must be returned in ascending order.
 
726
 *
 
727
 * Note: this can return a "lossy page" item pointer, indicating that the
 
728
 * key potentially matches all items on that heap page.  However, it is
 
729
 * not allowed to return both a lossy page pointer and exact (regular)
 
730
 * item pointers for the same page.  (Doing so would break the key-combination
 
731
 * logic in scanGetItem.)
 
732
 */
 
733
static void
 
734
keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
 
735
{
 
736
        ItemPointerData minItem;
 
737
        ItemPointerData curPageLossy;
 
738
        uint32          i;
 
739
        uint32          lossyEntry;
 
740
        bool            haveLossyEntry;
 
741
        GinScanEntry entry;
 
742
        bool            res;
 
743
        MemoryContext oldCtx;
 
744
 
 
745
        Assert(!key->isFinished);
 
746
 
 
747
        /*
 
748
         * Find the minimum of the active entry curItems.
 
749
         *
 
750
         * Note: a lossy-page entry is encoded by a ItemPointer with max value for
 
751
         * offset (0xffff), so that it will sort after any exact entries for the
 
752
         * same page.  So we'll prefer to return exact pointers not lossy
 
753
         * pointers, which is good.
 
754
         */
 
755
        ItemPointerSetMax(&minItem);
 
756
 
 
757
        for (i = 0; i < key->nentries; i++)
 
758
        {
 
759
                entry = key->scanEntry[i];
 
760
                if (entry->isFinished == FALSE &&
 
761
                        ginCompareItemPointers(&entry->curItem, &minItem) < 0)
 
762
                        minItem = entry->curItem;
 
763
        }
 
764
 
 
765
        if (ItemPointerIsMax(&minItem))
 
766
        {
 
767
                /* all entries are finished */
 
768
                key->isFinished = TRUE;
 
769
                return;
 
770
        }
 
771
 
 
772
        /*
 
773
         * We might have already tested this item; if so, no need to repeat work.
 
774
         * (Note: the ">" case can happen, if minItem is exact but we previously
 
775
         * had to set curItem to a lossy-page pointer.)
 
776
         */
 
777
        if (ginCompareItemPointers(&key->curItem, &minItem) >= 0)
 
778
                return;
 
779
 
 
780
        /*
 
781
         * OK, advance key->curItem and perform consistentFn test.
 
782
         */
 
783
        key->curItem = minItem;
 
784
 
 
785
        /*
 
786
         * Lossy-page entries pose a problem, since we don't know the correct
 
787
         * entryRes state to pass to the consistentFn, and we also don't know what
 
788
         * its combining logic will be (could be AND, OR, or even NOT). If the
 
789
         * logic is OR then the consistentFn might succeed for all items in the
 
790
         * lossy page even when none of the other entries match.
 
791
         *
 
792
         * If we have a single lossy-page entry then we check to see if the
 
793
         * consistentFn will succeed with only that entry TRUE.  If so, we return
 
794
         * a lossy-page pointer to indicate that the whole heap page must be
 
795
         * checked.  (On subsequent calls, we'll do nothing until minItem is past
 
796
         * the page altogether, thus ensuring that we never return both regular
 
797
         * and lossy pointers for the same page.)
 
798
         *
 
799
         * This idea could be generalized to more than one lossy-page entry, but
 
800
         * ideally lossy-page entries should be infrequent so it would seldom be
 
801
         * the case that we have more than one at once.  So it doesn't seem worth
 
802
         * the extra complexity to optimize that case. If we do find more than
 
803
         * one, we just punt and return a lossy-page pointer always.
 
804
         *
 
805
         * Note that only lossy-page entries pointing to the current item's page
 
806
         * should trigger this processing; we might have future lossy pages in the
 
807
         * entry array, but they aren't relevant yet.
 
808
         */
 
809
        ItemPointerSetLossyPage(&curPageLossy,
 
810
                                                        GinItemPointerGetBlockNumber(&key->curItem));
 
811
 
 
812
        lossyEntry = 0;
 
813
        haveLossyEntry = false;
 
814
        for (i = 0; i < key->nentries; i++)
 
815
        {
 
816
                entry = key->scanEntry[i];
 
817
                if (entry->isFinished == FALSE &&
 
818
                        ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
 
819
                {
 
820
                        if (haveLossyEntry)
 
821
                        {
 
822
                                /* Multiple lossy entries, punt */
 
823
                                key->curItem = curPageLossy;
 
824
                                key->curItemMatches = true;
 
825
                                key->recheckCurItem = true;
 
826
                                return;
 
827
                        }
 
828
                        lossyEntry = i;
 
829
                        haveLossyEntry = true;
 
830
                }
 
831
        }
 
832
 
 
833
        /* prepare for calling consistentFn in temp context */
 
834
        oldCtx = MemoryContextSwitchTo(tempCtx);
 
835
 
 
836
        if (haveLossyEntry)
 
837
        {
 
838
                /* Single lossy-page entry, so see if whole page matches */
 
839
                memset(key->entryRes, FALSE, key->nentries);
 
840
                key->entryRes[lossyEntry] = TRUE;
 
841
 
 
842
                if (callConsistentFn(ginstate, key))
 
843
                {
 
844
                        /* Yes, so clean up ... */
 
845
                        MemoryContextSwitchTo(oldCtx);
 
846
                        MemoryContextReset(tempCtx);
 
847
 
 
848
                        /* and return lossy pointer for whole page */
 
849
                        key->curItem = curPageLossy;
 
850
                        key->curItemMatches = true;
 
851
                        key->recheckCurItem = true;
 
852
                        return;
 
853
                }
 
854
        }
 
855
 
 
856
        /*
 
857
         * At this point we know that we don't need to return a lossy whole-page
 
858
         * pointer, but we might have matches for individual exact item pointers,
 
859
         * possibly in combination with a lossy pointer.  Our strategy if there's
 
860
         * a lossy pointer is to try the consistentFn both ways and return a hit
 
861
         * if it accepts either one (forcing the hit to be marked lossy so it will
 
862
         * be rechecked).  An exception is that we don't need to try it both ways
 
863
         * if the lossy pointer is in a "hidden" entry, because the consistentFn's
 
864
         * result can't depend on that.
 
865
         *
 
866
         * Prepare entryRes array to be passed to consistentFn.
 
867
         */
 
868
        for (i = 0; i < key->nentries; i++)
 
869
        {
 
870
                entry = key->scanEntry[i];
 
871
                if (entry->isFinished == FALSE &&
 
872
                        ginCompareItemPointers(&entry->curItem, &key->curItem) == 0)
 
873
                        key->entryRes[i] = TRUE;
 
874
                else
 
875
                        key->entryRes[i] = FALSE;
 
876
        }
 
877
        if (haveLossyEntry)
 
878
                key->entryRes[lossyEntry] = TRUE;
 
879
 
 
880
        res = callConsistentFn(ginstate, key);
 
881
 
 
882
        if (!res && haveLossyEntry && lossyEntry < key->nuserentries)
 
883
        {
 
884
                /* try the other way for the lossy item */
 
885
                key->entryRes[lossyEntry] = FALSE;
 
886
 
 
887
                res = callConsistentFn(ginstate, key);
 
888
        }
 
889
 
 
890
        key->curItemMatches = res;
 
891
        /* If we matched a lossy entry, force recheckCurItem = true */
 
892
        if (haveLossyEntry)
 
893
                key->recheckCurItem = true;
 
894
 
 
895
        /* clean up after consistentFn calls */
 
896
        MemoryContextSwitchTo(oldCtx);
 
897
        MemoryContextReset(tempCtx);
 
898
}
 
899
 
 
900
/*
 
901
 * Get next heap item pointer (after advancePast) from scan.
 
902
 * Returns true if anything found.
 
903
 * On success, *item and *recheck are set.
 
904
 *
 
905
 * Note: this is very nearly the same logic as in keyGetItem(), except
 
906
 * that we know the keys are to be combined with AND logic, whereas in
 
907
 * keyGetItem() the combination logic is known only to the consistentFn.
 
908
 */
 
909
static bool
 
910
scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
 
911
                        ItemPointerData *item, bool *recheck)
 
912
{
 
913
        GinScanOpaque so = (GinScanOpaque) scan->opaque;
 
914
        GinState   *ginstate = &so->ginstate;
 
915
        ItemPointerData myAdvancePast = *advancePast;
 
916
        uint32          i;
 
917
        bool            allFinished;
 
918
        bool            match;
 
919
 
 
920
        for (;;)
 
921
        {
 
922
                /*
 
923
                 * Advance any entries that are <= myAdvancePast.  In particular,
 
924
                 * since entry->curItem was initialized with ItemPointerSetMin, this
 
925
                 * ensures we fetch the first item for each entry on the first call.
 
926
                 */
 
927
                allFinished = TRUE;
 
928
 
 
929
                for (i = 0; i < so->totalentries; i++)
 
930
                {
 
931
                        GinScanEntry entry = so->entries[i];
 
932
 
 
933
                        while (entry->isFinished == FALSE &&
 
934
                                   ginCompareItemPointers(&entry->curItem,
 
935
                                                                                  &myAdvancePast) <= 0)
 
936
                                entryGetItem(ginstate, entry);
 
937
 
 
938
                        if (entry->isFinished == FALSE)
 
939
                                allFinished = FALSE;
 
940
                }
 
941
 
 
942
                if (allFinished)
 
943
                {
 
944
                        /* all entries exhausted, so we're done */
 
945
                        return false;
 
946
                }
 
947
 
 
948
                /*
 
949
                 * Perform the consistentFn test for each scan key.  If any key
 
950
                 * reports isFinished, meaning its subset of the entries is exhausted,
 
951
                 * we can stop.  Otherwise, set *item to the minimum of the key
 
952
                 * curItems.
 
953
                 */
 
954
                ItemPointerSetMax(item);
 
955
 
 
956
                for (i = 0; i < so->nkeys; i++)
 
957
                {
 
958
                        GinScanKey      key = so->keys + i;
 
959
 
 
960
                        keyGetItem(&so->ginstate, so->tempCtx, key);
 
961
 
 
962
                        if (key->isFinished)
 
963
                                return false;   /* finished one of keys */
 
964
 
 
965
                        if (ginCompareItemPointers(&key->curItem, item) < 0)
 
966
                                *item = key->curItem;
 
967
                }
 
968
 
 
969
                Assert(!ItemPointerIsMax(item));
 
970
 
 
971
                /*----------
 
972
                 * Now *item contains first ItemPointer after previous result.
 
973
                 *
 
974
                 * The item is a valid hit only if all the keys succeeded for either
 
975
                 * that exact TID, or a lossy reference to the same page.
 
976
                 *
 
977
                 * This logic works only if a keyGetItem stream can never contain both
 
978
                 * exact and lossy pointers for the same page.  Else we could have a
 
979
                 * case like
 
980
                 *
 
981
                 *              stream 1                stream 2
 
982
                 *              ...                             ...
 
983
                 *              42/6                    42/7
 
984
                 *              50/1                    42/0xffff
 
985
                 *              ...                             ...
 
986
                 *
 
987
                 * We would conclude that 42/6 is not a match and advance stream 1,
 
988
                 * thus never detecting the match to the lossy pointer in stream 2.
 
989
                 * (keyGetItem has a similar problem versus entryGetItem.)
 
990
                 *----------
 
991
                 */
 
992
                match = true;
 
993
                for (i = 0; i < so->nkeys; i++)
 
994
                {
 
995
                        GinScanKey      key = so->keys + i;
 
996
 
 
997
                        if (key->curItemMatches)
 
998
                        {
 
999
                                if (ginCompareItemPointers(item, &key->curItem) == 0)
 
1000
                                        continue;
 
1001
                                if (ItemPointerIsLossyPage(&key->curItem) &&
 
1002
                                        GinItemPointerGetBlockNumber(&key->curItem) ==
 
1003
                                        GinItemPointerGetBlockNumber(item))
 
1004
                                        continue;
 
1005
                        }
 
1006
                        match = false;
 
1007
                        break;
 
1008
                }
 
1009
 
 
1010
                if (match)
 
1011
                        break;
 
1012
 
 
1013
                /*
 
1014
                 * No hit.      Update myAdvancePast to this TID, so that on the next pass
 
1015
                 * we'll move to the next possible entry.
 
1016
                 */
 
1017
                myAdvancePast = *item;
 
1018
        }
 
1019
 
 
1020
        /*
 
1021
         * We must return recheck = true if any of the keys are marked recheck.
 
1022
         */
 
1023
        *recheck = false;
 
1024
        for (i = 0; i < so->nkeys; i++)
 
1025
        {
 
1026
                GinScanKey      key = so->keys + i;
 
1027
 
 
1028
                if (key->recheckCurItem)
 
1029
                {
 
1030
                        *recheck = true;
 
1031
                        break;
 
1032
                }
 
1033
        }
 
1034
 
 
1035
        return TRUE;
 
1036
}
 
1037
 
 
1038
 
 
1039
/*
 
1040
 * Functions for scanning the pending list
 
1041
 */
 
1042
 
 
1043
 
 
1044
/*
 
1045
 * Get ItemPointer of next heap row to be checked from pending list.
 
1046
 * Returns false if there are no more. On pages with several heap rows
 
1047
 * it returns each row separately, on page with part of heap row returns
 
1048
 * per page data.  pos->firstOffset and pos->lastOffset are set to identify
 
1049
 * the range of pending-list tuples belonging to this heap row.
 
1050
 *
 
1051
 * The pendingBuffer is presumed pinned and share-locked on entry, and is
 
1052
 * pinned and share-locked on success exit.  On failure exit it's released.
 
1053
 */
 
1054
static bool
 
1055
scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
 
1056
{
 
1057
        OffsetNumber maxoff;
 
1058
        Page            page;
 
1059
        IndexTuple      itup;
 
1060
 
 
1061
        ItemPointerSetInvalid(&pos->item);
 
1062
        for (;;)
 
1063
        {
 
1064
                page = BufferGetPage(pos->pendingBuffer);
 
1065
 
 
1066
                maxoff = PageGetMaxOffsetNumber(page);
 
1067
                if (pos->firstOffset > maxoff)
 
1068
                {
 
1069
                        BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
 
1070
 
 
1071
                        if (blkno == InvalidBlockNumber)
 
1072
                        {
 
1073
                                UnlockReleaseBuffer(pos->pendingBuffer);
 
1074
                                pos->pendingBuffer = InvalidBuffer;
 
1075
 
 
1076
                                return false;
 
1077
                        }
 
1078
                        else
 
1079
                        {
 
1080
                                /*
 
1081
                                 * Here we must prevent deletion of next page by insertcleanup
 
1082
                                 * process, which may be trying to obtain exclusive lock on
 
1083
                                 * current page.  So, we lock next page before releasing the
 
1084
                                 * current one
 
1085
                                 */
 
1086
                                Buffer          tmpbuf = ReadBuffer(scan->indexRelation, blkno);
 
1087
 
 
1088
                                LockBuffer(tmpbuf, GIN_SHARE);
 
1089
                                UnlockReleaseBuffer(pos->pendingBuffer);
 
1090
 
 
1091
                                pos->pendingBuffer = tmpbuf;
 
1092
                                pos->firstOffset = FirstOffsetNumber;
 
1093
                        }
 
1094
                }
 
1095
                else
 
1096
                {
 
1097
                        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
 
1098
                        pos->item = itup->t_tid;
 
1099
                        if (GinPageHasFullRow(page))
 
1100
                        {
 
1101
                                /*
 
1102
                                 * find itempointer to the next row
 
1103
                                 */
 
1104
                                for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
 
1105
                                {
 
1106
                                        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
 
1107
                                        if (!ItemPointerEquals(&pos->item, &itup->t_tid))
 
1108
                                                break;
 
1109
                                }
 
1110
                        }
 
1111
                        else
 
1112
                        {
 
1113
                                /*
 
1114
                                 * All itempointers are the same on this page
 
1115
                                 */
 
1116
                                pos->lastOffset = maxoff + 1;
 
1117
                        }
 
1118
 
 
1119
                        /*
 
1120
                         * Now pos->firstOffset points to the first tuple of current heap
 
1121
                         * row, pos->lastOffset points to the first tuple of next heap row
 
1122
                         * (or to the end of page)
 
1123
                         */
 
1124
                        break;
 
1125
                }
 
1126
        }
 
1127
 
 
1128
        return true;
 
1129
}
 
1130
 
 
1131
/*
 
1132
 * Scan pending-list page from current tuple (off) up till the first of:
 
1133
 * - match is found (then returns true)
 
1134
 * - no later match is possible
 
1135
 * - tuple's attribute number is not equal to entry's attrnum
 
1136
 * - reach end of page
 
1137
 *
 
1138
 * datum[]/category[]/datumExtracted[] arrays are used to cache the results
 
1139
 * of gintuple_get_key() on the current page.
 
1140
 */
 
1141
static bool
 
1142
matchPartialInPendingList(GinState *ginstate, Page page,
 
1143
                                                  OffsetNumber off, OffsetNumber maxoff,
 
1144
                                                  GinScanEntry entry,
 
1145
                                                  Datum *datum, GinNullCategory *category,
 
1146
                                                  bool *datumExtracted)
 
1147
{
 
1148
        IndexTuple      itup;
 
1149
        int32           cmp;
 
1150
 
 
1151
        /* Partial match to a null is not possible */
 
1152
        if (entry->queryCategory != GIN_CAT_NORM_KEY)
 
1153
                return false;
 
1154
 
 
1155
        while (off < maxoff)
 
1156
        {
 
1157
                itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
 
1158
 
 
1159
                if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
 
1160
                        return false;
 
1161
 
 
1162
                if (datumExtracted[off - 1] == false)
 
1163
                {
 
1164
                        datum[off - 1] = gintuple_get_key(ginstate, itup,
 
1165
                                                                                          &category[off - 1]);
 
1166
                        datumExtracted[off - 1] = true;
 
1167
                }
 
1168
 
 
1169
                /* Once we hit nulls, no further match is possible */
 
1170
                if (category[off - 1] != GIN_CAT_NORM_KEY)
 
1171
                        return false;
 
1172
 
 
1173
                /*----------
 
1174
                 * Check partial match.
 
1175
                 * case cmp == 0 => match
 
1176
                 * case cmp > 0 => not match and end scan (no later match possible)
 
1177
                 * case cmp < 0 => not match and continue scan
 
1178
                 *----------
 
1179
                 */
 
1180
                cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
 
1181
                                                                                          ginstate->supportCollation[entry->attnum - 1],
 
1182
                                                                                          entry->queryKey,
 
1183
                                                                                          datum[off - 1],
 
1184
                                                                                  UInt16GetDatum(entry->strategy),
 
1185
                                                                                PointerGetDatum(entry->extra_data)));
 
1186
                if (cmp == 0)
 
1187
                        return true;
 
1188
                else if (cmp > 0)
 
1189
                        return false;
 
1190
 
 
1191
                off++;
 
1192
        }
 
1193
 
 
1194
        return false;
 
1195
}
 
1196
 
 
1197
/*
 
1198
 * Set up the entryRes array for each key by looking at
 
1199
 * every entry for current heap row in pending list.
 
1200
 *
 
1201
 * Returns true if each scan key has at least one entryRes match.
 
1202
 * This corresponds to the situations where the normal index search will
 
1203
 * try to apply the key's consistentFn.  (A tuple not meeting that requirement
 
1204
 * cannot be returned by the normal search since no entry stream will
 
1205
 * source its TID.)
 
1206
 *
 
1207
 * The pendingBuffer is presumed pinned and share-locked on entry.
 
1208
 */
 
1209
static bool
 
1210
collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
 
1211
{
 
1212
        GinScanOpaque so = (GinScanOpaque) scan->opaque;
 
1213
        OffsetNumber attrnum;
 
1214
        Page            page;
 
1215
        IndexTuple      itup;
 
1216
        int                     i,
 
1217
                                j;
 
1218
 
 
1219
        /*
 
1220
         * Reset all entryRes and hasMatchKey flags
 
1221
         */
 
1222
        for (i = 0; i < so->nkeys; i++)
 
1223
        {
 
1224
                GinScanKey      key = so->keys + i;
 
1225
 
 
1226
                memset(key->entryRes, FALSE, key->nentries);
 
1227
        }
 
1228
        memset(pos->hasMatchKey, FALSE, so->nkeys);
 
1229
 
 
1230
        /*
 
1231
         * Outer loop iterates over multiple pending-list pages when a single heap
 
1232
         * row has entries spanning those pages.
 
1233
         */
 
1234
        for (;;)
 
1235
        {
 
1236
                Datum           datum[BLCKSZ / sizeof(IndexTupleData)];
 
1237
                GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
 
1238
                bool            datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
 
1239
 
 
1240
                Assert(pos->lastOffset > pos->firstOffset);
 
1241
                memset(datumExtracted + pos->firstOffset - 1, 0,
 
1242
                           sizeof(bool) * (pos->lastOffset - pos->firstOffset));
 
1243
 
 
1244
                page = BufferGetPage(pos->pendingBuffer);
 
1245
 
 
1246
                for (i = 0; i < so->nkeys; i++)
 
1247
                {
 
1248
                        GinScanKey      key = so->keys + i;
 
1249
 
 
1250
                        for (j = 0; j < key->nentries; j++)
 
1251
                        {
 
1252
                                GinScanEntry entry = key->scanEntry[j];
 
1253
                                OffsetNumber StopLow = pos->firstOffset,
 
1254
                                                        StopHigh = pos->lastOffset,
 
1255
                                                        StopMiddle;
 
1256
 
 
1257
                                /* If already matched on earlier page, do no extra work */
 
1258
                                if (key->entryRes[j])
 
1259
                                        continue;
 
1260
 
 
1261
                                /*
 
1262
                                 * Interesting tuples are from pos->firstOffset to
 
1263
                                 * pos->lastOffset and they are ordered by (attnum, Datum) as
 
1264
                                 * it's done in entry tree.  So we can use binary search to
 
1265
                                 * avoid linear scanning.
 
1266
                                 */
 
1267
                                while (StopLow < StopHigh)
 
1268
                                {
 
1269
                                        int                     res;
 
1270
 
 
1271
                                        StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
 
1272
 
 
1273
                                        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
 
1274
 
 
1275
                                        attrnum = gintuple_get_attrnum(&so->ginstate, itup);
 
1276
 
 
1277
                                        if (key->attnum < attrnum)
 
1278
                                        {
 
1279
                                                StopHigh = StopMiddle;
 
1280
                                                continue;
 
1281
                                        }
 
1282
                                        if (key->attnum > attrnum)
 
1283
                                        {
 
1284
                                                StopLow = StopMiddle + 1;
 
1285
                                                continue;
 
1286
                                        }
 
1287
 
 
1288
                                        if (datumExtracted[StopMiddle - 1] == false)
 
1289
                                        {
 
1290
                                                datum[StopMiddle - 1] =
 
1291
                                                        gintuple_get_key(&so->ginstate, itup,
 
1292
                                                                                         &category[StopMiddle - 1]);
 
1293
                                                datumExtracted[StopMiddle - 1] = true;
 
1294
                                        }
 
1295
 
 
1296
                                        if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
 
1297
                                        {
 
1298
                                                /* special behavior depending on searchMode */
 
1299
                                                if (entry->searchMode == GIN_SEARCH_MODE_ALL)
 
1300
                                                {
 
1301
                                                        /* match anything except NULL_ITEM */
 
1302
                                                        if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
 
1303
                                                                res = -1;
 
1304
                                                        else
 
1305
                                                                res = 0;
 
1306
                                                }
 
1307
                                                else
 
1308
                                                {
 
1309
                                                        /* match everything */
 
1310
                                                        res = 0;
 
1311
                                                }
 
1312
                                        }
 
1313
                                        else
 
1314
                                        {
 
1315
                                                res = ginCompareEntries(&so->ginstate,
 
1316
                                                                                                entry->attnum,
 
1317
                                                                                                entry->queryKey,
 
1318
                                                                                                entry->queryCategory,
 
1319
                                                                                                datum[StopMiddle - 1],
 
1320
                                                                                                category[StopMiddle - 1]);
 
1321
                                        }
 
1322
 
 
1323
                                        if (res == 0)
 
1324
                                        {
 
1325
                                                /*
 
1326
                                                 * Found exact match (there can be only one, except in
 
1327
                                                 * EMPTY_QUERY mode).
 
1328
                                                 *
 
1329
                                                 * If doing partial match, scan forward from here to
 
1330
                                                 * end of page to check for matches.
 
1331
                                                 *
 
1332
                                                 * See comment above about tuple's ordering.
 
1333
                                                 */
 
1334
                                                if (entry->isPartialMatch)
 
1335
                                                        key->entryRes[j] =
 
1336
                                                                matchPartialInPendingList(&so->ginstate,
 
1337
                                                                                                                  page,
 
1338
                                                                                                                  StopMiddle,
 
1339
                                                                                                                  pos->lastOffset,
 
1340
                                                                                                                  entry,
 
1341
                                                                                                                  datum,
 
1342
                                                                                                                  category,
 
1343
                                                                                                                  datumExtracted);
 
1344
                                                else
 
1345
                                                        key->entryRes[j] = true;
 
1346
 
 
1347
                                                /* done with binary search */
 
1348
                                                break;
 
1349
                                        }
 
1350
                                        else if (res < 0)
 
1351
                                                StopHigh = StopMiddle;
 
1352
                                        else
 
1353
                                                StopLow = StopMiddle + 1;
 
1354
                                }
 
1355
 
 
1356
                                if (StopLow >= StopHigh && entry->isPartialMatch)
 
1357
                                {
 
1358
                                        /*
 
1359
                                         * No exact match on this page.  If doing partial match,
 
1360
                                         * scan from the first tuple greater than target value to
 
1361
                                         * end of page.  Note that since we don't remember whether
 
1362
                                         * the comparePartialFn told us to stop early on a
 
1363
                                         * previous page, we will uselessly apply comparePartialFn
 
1364
                                         * to the first tuple on each subsequent page.
 
1365
                                         */
 
1366
                                        key->entryRes[j] =
 
1367
                                                matchPartialInPendingList(&so->ginstate,
 
1368
                                                                                                  page,
 
1369
                                                                                                  StopHigh,
 
1370
                                                                                                  pos->lastOffset,
 
1371
                                                                                                  entry,
 
1372
                                                                                                  datum,
 
1373
                                                                                                  category,
 
1374
                                                                                                  datumExtracted);
 
1375
                                }
 
1376
 
 
1377
                                pos->hasMatchKey[i] |= key->entryRes[j];
 
1378
                        }
 
1379
                }
 
1380
 
 
1381
                /* Advance firstOffset over the scanned tuples */
 
1382
                pos->firstOffset = pos->lastOffset;
 
1383
 
 
1384
                if (GinPageHasFullRow(page))
 
1385
                {
 
1386
                        /*
 
1387
                         * We have examined all pending entries for the current heap row.
 
1388
                         * Break out of loop over pages.
 
1389
                         */
 
1390
                        break;
 
1391
                }
 
1392
                else
 
1393
                {
 
1394
                        /*
 
1395
                         * Advance to next page of pending entries for the current heap
 
1396
                         * row.  Complain if there isn't one.
 
1397
                         */
 
1398
                        ItemPointerData item = pos->item;
 
1399
 
 
1400
                        if (scanGetCandidate(scan, pos) == false ||
 
1401
                                !ItemPointerEquals(&pos->item, &item))
 
1402
                                elog(ERROR, "could not find additional pending pages for same heap tuple");
 
1403
                }
 
1404
        }
 
1405
 
 
1406
        /*
 
1407
         * Now return "true" if all scan keys have at least one matching datum
 
1408
         */
 
1409
        for (i = 0; i < so->nkeys; i++)
 
1410
        {
 
1411
                if (pos->hasMatchKey[i] == false)
 
1412
                        return false;
 
1413
        }
 
1414
 
 
1415
        return true;
 
1416
}
 
1417
 
 
1418
/*
 
1419
 * Collect all matched rows from pending list into bitmap
 
1420
 */
 
1421
static void
 
1422
scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
 
1423
{
 
1424
        GinScanOpaque so = (GinScanOpaque) scan->opaque;
 
1425
        MemoryContext oldCtx;
 
1426
        bool            recheck,
 
1427
                                match;
 
1428
        int                     i;
 
1429
        pendingPosition pos;
 
1430
        Buffer          metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
 
1431
        BlockNumber blkno;
 
1432
 
 
1433
        *ntids = 0;
 
1434
 
 
1435
        LockBuffer(metabuffer, GIN_SHARE);
 
1436
        blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
 
1437
 
 
1438
        /*
 
1439
         * fetch head of list before unlocking metapage. head page must be pinned
 
1440
         * to prevent deletion by vacuum process
 
1441
         */
 
1442
        if (blkno == InvalidBlockNumber)
 
1443
        {
 
1444
                /* No pending list, so proceed with normal scan */
 
1445
                UnlockReleaseBuffer(metabuffer);
 
1446
                return;
 
1447
        }
 
1448
 
 
1449
        pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
 
1450
        LockBuffer(pos.pendingBuffer, GIN_SHARE);
 
1451
        pos.firstOffset = FirstOffsetNumber;
 
1452
        UnlockReleaseBuffer(metabuffer);
 
1453
        pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
 
1454
 
 
1455
        /*
 
1456
         * loop for each heap row. scanGetCandidate returns full row or row's
 
1457
         * tuples from first page.
 
1458
         */
 
1459
        while (scanGetCandidate(scan, &pos))
 
1460
        {
 
1461
                /*
 
1462
                 * Check entries in tuple and set up entryRes array.
 
1463
                 *
 
1464
                 * If pending tuples belonging to the current heap row are spread
 
1465
                 * across several pages, collectMatchesForHeapRow will read all of
 
1466
                 * those pages.
 
1467
                 */
 
1468
                if (!collectMatchesForHeapRow(scan, &pos))
 
1469
                        continue;
 
1470
 
 
1471
                /*
 
1472
                 * Matching of entries of one row is finished, so check row using
 
1473
                 * consistent functions.
 
1474
                 */
 
1475
                oldCtx = MemoryContextSwitchTo(so->tempCtx);
 
1476
                recheck = false;
 
1477
                match = true;
 
1478
 
 
1479
                for (i = 0; i < so->nkeys; i++)
 
1480
                {
 
1481
                        GinScanKey      key = so->keys + i;
 
1482
 
 
1483
                        if (!callConsistentFn(&so->ginstate, key))
 
1484
                        {
 
1485
                                match = false;
 
1486
                                break;
 
1487
                        }
 
1488
                        recheck |= key->recheckCurItem;
 
1489
                }
 
1490
 
 
1491
                MemoryContextSwitchTo(oldCtx);
 
1492
                MemoryContextReset(so->tempCtx);
 
1493
 
 
1494
                if (match)
 
1495
                {
 
1496
                        tbm_add_tuples(tbm, &pos.item, 1, recheck);
 
1497
                        (*ntids)++;
 
1498
                }
 
1499
        }
 
1500
 
 
1501
        pfree(pos.hasMatchKey);
 
1502
}
 
1503
 
 
1504
 
 
1505
#define GinIsNewKey(s)          ( ((GinScanOpaque) scan->opaque)->keys == NULL )
 
1506
#define GinIsVoidRes(s)         ( ((GinScanOpaque) scan->opaque)->isVoidRes )
 
1507
 
 
1508
Datum
 
1509
gingetbitmap(PG_FUNCTION_ARGS)
 
1510
{
 
1511
        IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
 
1512
        TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
 
1513
        int64           ntids;
 
1514
        ItemPointerData iptr;
 
1515
        bool            recheck;
 
1516
 
 
1517
        /*
 
1518
         * Set up the scan keys, and check for unsatisfiable query.
 
1519
         */
 
1520
        if (GinIsNewKey(scan))
 
1521
                ginNewScanKey(scan);
 
1522
 
 
1523
        if (GinIsVoidRes(scan))
 
1524
                PG_RETURN_INT64(0);
 
1525
 
 
1526
        ntids = 0;
 
1527
 
 
1528
        /*
 
1529
         * First, scan the pending list and collect any matching entries into the
 
1530
         * bitmap.      After we scan a pending item, some other backend could post it
 
1531
         * into the main index, and so we might visit it a second time during the
 
1532
         * main scan.  This is okay because we'll just re-set the same bit in the
 
1533
         * bitmap.      (The possibility of duplicate visits is a major reason why GIN
 
1534
         * can't support the amgettuple API, however.) Note that it would not do
 
1535
         * to scan the main index before the pending list, since concurrent
 
1536
         * cleanup could then make us miss entries entirely.
 
1537
         */
 
1538
        scanPendingInsert(scan, tbm, &ntids);
 
1539
 
 
1540
        /*
 
1541
         * Now scan the main index.
 
1542
         */
 
1543
        startScan(scan);
 
1544
 
 
1545
        ItemPointerSetMin(&iptr);
 
1546
 
 
1547
        for (;;)
 
1548
        {
 
1549
                CHECK_FOR_INTERRUPTS();
 
1550
 
 
1551
                if (!scanGetItem(scan, &iptr, &iptr, &recheck))
 
1552
                        break;
 
1553
 
 
1554
                if (ItemPointerIsLossyPage(&iptr))
 
1555
                        tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
 
1556
                else
 
1557
                        tbm_add_tuples(tbm, &iptr, 1, recheck);
 
1558
                ntids++;
 
1559
        }
 
1560
 
 
1561
        PG_RETURN_INT64(ntids);
 
1562
}