1
/*-------------------------------------------------------------------------
4
* routines to manage scans on index relations
6
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
11
* $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.56 2004-12-31 21:59:26 pgsql Exp $
13
*-------------------------------------------------------------------------
18
#include "access/genam.h"
19
#include "access/rtree.h"
20
#include "utils/lsyscache.h"
21
#include "utils/resowner.h"
24
/* routines defined and used here */
25
static void rtregscan(IndexScanDesc s);
26
static void rtdropscan(IndexScanDesc s);
27
static void rtadjone(IndexScanDesc s, int op, BlockNumber blkno,
29
static void adjuststack(RTSTACK *stk, BlockNumber blkno);
30
static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
31
int op, BlockNumber blkno, OffsetNumber offnum);
34
* Whenever we start an rtree scan in a backend, we register it in private
35
* space. Then if the rtree index gets updated, we check all registered
36
* scans and adjust them if the tuple they point at got moved by the
37
* update. We only need to do this in private space, because when we update
38
* an rtree we have a write lock on the tree, so no other process can have
39
* any locks at all on it. A single transaction can have write and read
40
* locks on the same object, so that's why we need to handle this case.
43
typedef struct RTScanListData
45
IndexScanDesc rtsl_scan;
46
ResourceOwner rtsl_owner;
47
struct RTScanListData *rtsl_next;
50
typedef RTScanListData *RTScanList;
52
/* pointer to list of local scans on rtrees */
53
static RTScanList RTScans = NULL;
56
rtbeginscan(PG_FUNCTION_ARGS)
58
Relation r = (Relation) PG_GETARG_POINTER(0);
59
int nkeys = PG_GETARG_INT32(1);
60
ScanKey key = (ScanKey) PG_GETARG_POINTER(2);
63
s = RelationGetIndexScan(r, nkeys, key);
71
rtrescan(PG_FUNCTION_ARGS)
73
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
74
ScanKey key = (ScanKey) PG_GETARG_POINTER(1);
79
* Clear all the pointers.
81
ItemPointerSetInvalid(&s->currentItemData);
82
ItemPointerSetInvalid(&s->currentMarkData);
84
p = (RTreeScanOpaque) s->opaque;
87
/* rescan an existing indexscan --- reset state */
88
freestack(p->s_stack);
89
freestack(p->s_markstk);
90
p->s_stack = p->s_markstk = NULL;
95
/* initialize opaque data */
96
p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData));
97
p->s_stack = p->s_markstk = NULL;
98
p->s_internalNKey = s->numberOfKeys;
101
if (s->numberOfKeys > 0)
102
p->s_internalKey = (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys);
105
/* Update scan key, if a new one is given */
106
if (key && s->numberOfKeys > 0)
110
s->numberOfKeys * sizeof(ScanKeyData));
113
* Scans on internal pages use different operators than they do on
114
* leaf pages. For example, if the user wants all boxes that
115
* exactly match (x1,y1,x2,y2), then on internal pages we need to
116
* find all boxes that contain (x1,y1,x2,y2).
118
for (i = 0; i < s->numberOfKeys; i++)
120
AttrNumber attno = s->keyData[i].sk_attno;
122
StrategyNumber int_strategy;
124
RegProcedure int_proc;
126
opclass = s->indexRelation->rd_index->indclass[attno - 1];
127
int_strategy = RTMapToInternalOperator(s->keyData[i].sk_strategy);
128
int_oper = get_opclass_member(opclass,
129
s->keyData[i].sk_subtype,
131
int_proc = get_opcode(int_oper);
132
ScanKeyEntryInitialize(&(p->s_internalKey[i]),
133
s->keyData[i].sk_flags,
136
s->keyData[i].sk_subtype,
138
s->keyData[i].sk_argument);
146
rtmarkpos(PG_FUNCTION_ARGS)
148
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
154
s->currentMarkData = s->currentItemData;
155
p = (RTreeScanOpaque) s->opaque;
156
if (p->s_flags & RTS_CURBEFORE)
157
p->s_flags |= RTS_MRKBEFORE;
159
p->s_flags &= ~RTS_MRKBEFORE;
164
/* copy the parent stack from the current item data */
167
tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
168
tmp->rts_child = n->rts_child;
169
tmp->rts_blk = n->rts_blk;
175
freestack(p->s_markstk);
182
rtrestrpos(PG_FUNCTION_ARGS)
184
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
190
s->currentItemData = s->currentMarkData;
191
p = (RTreeScanOpaque) s->opaque;
192
if (p->s_flags & RTS_MRKBEFORE)
193
p->s_flags |= RTS_CURBEFORE;
195
p->s_flags &= ~RTS_CURBEFORE;
200
/* copy the parent stack from the current item data */
203
tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
204
tmp->rts_child = n->rts_child;
205
tmp->rts_blk = n->rts_blk;
211
freestack(p->s_stack);
218
rtendscan(PG_FUNCTION_ARGS)
220
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
223
p = (RTreeScanOpaque) s->opaque;
227
freestack(p->s_stack);
228
freestack(p->s_markstk);
233
/* XXX don't unset read lock -- two-phase locking */
239
rtregscan(IndexScanDesc s)
243
l = (RTScanList) palloc(sizeof(RTScanListData));
245
l->rtsl_owner = CurrentResourceOwner;
246
l->rtsl_next = RTScans;
251
rtdropscan(IndexScanDesc s)
259
l != NULL && l->rtsl_scan != s;
264
elog(ERROR, "rtree scan list corrupted -- could not find 0x%p",
268
RTScans = l->rtsl_next;
270
prev->rtsl_next = l->rtsl_next;
276
* ReleaseResources_rtree() --- clean up rtree subsystem resources.
278
* This is here because it needs to touch this module's static var RTScans.
281
ReleaseResources_rtree(void)
288
* Note: this should be a no-op during normal query shutdown. However,
289
* in an abort situation ExecutorEnd is not called and so there may be
290
* open index scans to clean up.
294
for (l = RTScans; l != NULL; l = next)
297
if (l->rtsl_owner == CurrentResourceOwner)
302
prev->rtsl_next = next;
305
/* prev does not change */
313
rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum)
318
relid = RelationGetRelid(r);
319
for (l = RTScans; l != NULL; l = l->rtsl_next)
321
if (RelationGetRelid(l->rtsl_scan->indexRelation) == relid)
322
rtadjone(l->rtsl_scan, op, blkno, offnum);
327
* rtadjone() -- adjust one scan for update.
329
* By here, the scan passed in is on a modified relation. Op tells
330
* us what the modification is, and blkno and offind tell us what
331
* block and offset index were affected. This routine checks the
332
* current and marked positions, and the current and marked stacks,
333
* to see if any stored location needs to be changed because of the
334
* update. If so, we make the change here.
337
rtadjone(IndexScanDesc s,
344
adjustiptr(s, &(s->currentItemData), op, blkno, offnum);
345
adjustiptr(s, &(s->currentMarkData), op, blkno, offnum);
347
so = (RTreeScanOpaque) s->opaque;
349
if (op == RTOP_SPLIT)
351
adjuststack(so->s_stack, blkno);
352
adjuststack(so->s_markstk, blkno);
357
* adjustiptr() -- adjust current and marked item pointers in the scan
359
* Depending on the type of update and the place it happened, we
360
* need to do nothing, to back up one record, or to start over on
364
adjustiptr(IndexScanDesc s,
373
if (ItemPointerIsValid(iptr))
375
if (ItemPointerGetBlockNumber(iptr) == blkno)
377
curoff = ItemPointerGetOffsetNumber(iptr);
378
so = (RTreeScanOpaque) s->opaque;
383
/* back up one if we need to */
384
if (curoff >= offnum)
387
if (curoff > FirstOffsetNumber)
389
/* just adjust the item pointer */
390
ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff));
395
* remember that we're before the current
398
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
399
if (iptr == &(s->currentItemData))
400
so->s_flags |= RTS_CURBEFORE;
402
so->s_flags |= RTS_MRKBEFORE;
408
/* back to start of page on split */
409
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
410
if (iptr == &(s->currentItemData))
411
so->s_flags &= ~RTS_CURBEFORE;
413
so->s_flags &= ~RTS_MRKBEFORE;
417
elog(ERROR, "unrecognized operation in rtree scan adjust: %d", op);
424
* adjuststack() -- adjust the supplied stack for a split on a page in
425
* the index we're scanning.
427
* If a page on our parent stack has split, we need to back up to the
428
* beginning of the page and rescan it. The reason for this is that
429
* the split algorithm for rtrees doesn't order tuples in any useful
430
* way on a single page. This means on that a split, we may wind up
431
* looking at some heap tuples more than once. This is handled in the
432
* access method update code for heaps; if we've modified the tuple we
433
* are looking at already in this transaction, we ignore the update
437
adjuststack(RTSTACK *stk, BlockNumber blkno)
441
if (stk->rts_blk == blkno)
442
stk->rts_child = FirstOffsetNumber;
444
stk = stk->rts_parent;