~vcs-imports/mammoth-replicator/trunk

« back to all changes in this revision

Viewing changes to src/backend/access/rtree/rtscan.c

  • Committer: alvherre
  • Date: 2005-12-16 21:24:52 UTC
  • Revision ID: svn-v4:db760fc0-0f08-0410-9d63-cc6633f64896:trunk:1
Initial import of the REL8_0_3 sources from the Pgsql CVS repository.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * rtscan.c
 
4
 *        routines to manage scans on index relations
 
5
 *
 
6
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 
7
 * Portions Copyright (c) 1994, Regents of the University of California
 
8
 *
 
9
 *
 
10
 * IDENTIFICATION
 
11
 *        $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.56 2004-12-31 21:59:26 pgsql Exp $
 
12
 *
 
13
 *-------------------------------------------------------------------------
 
14
 */
 
15
 
 
16
#include "postgres.h"
 
17
 
 
18
#include "access/genam.h"
 
19
#include "access/rtree.h"
 
20
#include "utils/lsyscache.h"
 
21
#include "utils/resowner.h"
 
22
 
 
23
 
 
24
/* routines defined and used here */
 
25
static void rtregscan(IndexScanDesc s);
 
26
static void rtdropscan(IndexScanDesc s);
 
27
static void rtadjone(IndexScanDesc s, int op, BlockNumber blkno,
 
28
                 OffsetNumber offnum);
 
29
static void adjuststack(RTSTACK *stk, BlockNumber blkno);
 
30
static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
 
31
                   int op, BlockNumber blkno, OffsetNumber offnum);
 
32
 
 
33
/*
 
34
 *      Whenever we start an rtree scan in a backend, we register it in private
 
35
 *      space.  Then if the rtree index gets updated, we check all registered
 
36
 *      scans and adjust them if the tuple they point at got moved by the
 
37
 *      update.  We only need to do this in private space, because when we update
 
38
 *      an rtree we have a write lock on the tree, so no other process can have
 
39
 *      any locks at all on it.  A single transaction can have write and read
 
40
 *      locks on the same object, so that's why we need to handle this case.
 
41
 */
 
42
 
 
43
typedef struct RTScanListData
 
44
{
 
45
        IndexScanDesc rtsl_scan;
 
46
        ResourceOwner rtsl_owner;
 
47
        struct RTScanListData *rtsl_next;
 
48
} RTScanListData;
 
49
 
 
50
typedef RTScanListData *RTScanList;
 
51
 
 
52
/* pointer to list of local scans on rtrees */
 
53
static RTScanList RTScans = NULL;
 
54
 
 
55
Datum
 
56
rtbeginscan(PG_FUNCTION_ARGS)
 
57
{
 
58
        Relation        r = (Relation) PG_GETARG_POINTER(0);
 
59
        int                     nkeys = PG_GETARG_INT32(1);
 
60
        ScanKey         key = (ScanKey) PG_GETARG_POINTER(2);
 
61
        IndexScanDesc s;
 
62
 
 
63
        s = RelationGetIndexScan(r, nkeys, key);
 
64
 
 
65
        rtregscan(s);
 
66
 
 
67
        PG_RETURN_POINTER(s);
 
68
}
 
69
 
 
70
Datum
 
71
rtrescan(PG_FUNCTION_ARGS)
 
72
{
 
73
        IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
 
74
        ScanKey         key = (ScanKey) PG_GETARG_POINTER(1);
 
75
        RTreeScanOpaque p;
 
76
        int                     i;
 
77
 
 
78
        /*
 
79
         * Clear all the pointers.
 
80
         */
 
81
        ItemPointerSetInvalid(&s->currentItemData);
 
82
        ItemPointerSetInvalid(&s->currentMarkData);
 
83
 
 
84
        p = (RTreeScanOpaque) s->opaque;
 
85
        if (p != NULL)
 
86
        {
 
87
                /* rescan an existing indexscan --- reset state */
 
88
                freestack(p->s_stack);
 
89
                freestack(p->s_markstk);
 
90
                p->s_stack = p->s_markstk = NULL;
 
91
                p->s_flags = 0x0;
 
92
        }
 
93
        else
 
94
        {
 
95
                /* initialize opaque data */
 
96
                p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData));
 
97
                p->s_stack = p->s_markstk = NULL;
 
98
                p->s_internalNKey = s->numberOfKeys;
 
99
                p->s_flags = 0x0;
 
100
                s->opaque = p;
 
101
                if (s->numberOfKeys > 0)
 
102
                        p->s_internalKey = (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys);
 
103
        }
 
104
 
 
105
        /* Update scan key, if a new one is given */
 
106
        if (key && s->numberOfKeys > 0)
 
107
        {
 
108
                memmove(s->keyData,
 
109
                                key,
 
110
                                s->numberOfKeys * sizeof(ScanKeyData));
 
111
 
 
112
                /*
 
113
                 * Scans on internal pages use different operators than they do on
 
114
                 * leaf pages.  For example, if the user wants all boxes that
 
115
                 * exactly match (x1,y1,x2,y2), then on internal pages we need to
 
116
                 * find all boxes that contain (x1,y1,x2,y2).
 
117
                 */
 
118
                for (i = 0; i < s->numberOfKeys; i++)
 
119
                {
 
120
                        AttrNumber      attno = s->keyData[i].sk_attno;
 
121
                        Oid                     opclass;
 
122
                        StrategyNumber int_strategy;
 
123
                        Oid                     int_oper;
 
124
                        RegProcedure int_proc;
 
125
 
 
126
                        opclass = s->indexRelation->rd_index->indclass[attno - 1];
 
127
                        int_strategy = RTMapToInternalOperator(s->keyData[i].sk_strategy);
 
128
                        int_oper = get_opclass_member(opclass,
 
129
                                                                                  s->keyData[i].sk_subtype,
 
130
                                                                                  int_strategy);
 
131
                        int_proc = get_opcode(int_oper);
 
132
                        ScanKeyEntryInitialize(&(p->s_internalKey[i]),
 
133
                                                                   s->keyData[i].sk_flags,
 
134
                                                                   attno,
 
135
                                                                   int_strategy,
 
136
                                                                   s->keyData[i].sk_subtype,
 
137
                                                                   int_proc,
 
138
                                                                   s->keyData[i].sk_argument);
 
139
                }
 
140
        }
 
141
 
 
142
        PG_RETURN_VOID();
 
143
}
 
144
 
 
145
Datum
 
146
rtmarkpos(PG_FUNCTION_ARGS)
 
147
{
 
148
        IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
 
149
        RTreeScanOpaque p;
 
150
        RTSTACK    *o,
 
151
                           *n,
 
152
                           *tmp;
 
153
 
 
154
        s->currentMarkData = s->currentItemData;
 
155
        p = (RTreeScanOpaque) s->opaque;
 
156
        if (p->s_flags & RTS_CURBEFORE)
 
157
                p->s_flags |= RTS_MRKBEFORE;
 
158
        else
 
159
                p->s_flags &= ~RTS_MRKBEFORE;
 
160
 
 
161
        o = NULL;
 
162
        n = p->s_stack;
 
163
 
 
164
        /* copy the parent stack from the current item data */
 
165
        while (n != NULL)
 
166
        {
 
167
                tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
 
168
                tmp->rts_child = n->rts_child;
 
169
                tmp->rts_blk = n->rts_blk;
 
170
                tmp->rts_parent = o;
 
171
                o = tmp;
 
172
                n = n->rts_parent;
 
173
        }
 
174
 
 
175
        freestack(p->s_markstk);
 
176
        p->s_markstk = o;
 
177
 
 
178
        PG_RETURN_VOID();
 
179
}
 
180
 
 
181
Datum
 
182
rtrestrpos(PG_FUNCTION_ARGS)
 
183
{
 
184
        IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
 
185
        RTreeScanOpaque p;
 
186
        RTSTACK    *o,
 
187
                           *n,
 
188
                           *tmp;
 
189
 
 
190
        s->currentItemData = s->currentMarkData;
 
191
        p = (RTreeScanOpaque) s->opaque;
 
192
        if (p->s_flags & RTS_MRKBEFORE)
 
193
                p->s_flags |= RTS_CURBEFORE;
 
194
        else
 
195
                p->s_flags &= ~RTS_CURBEFORE;
 
196
 
 
197
        o = NULL;
 
198
        n = p->s_markstk;
 
199
 
 
200
        /* copy the parent stack from the current item data */
 
201
        while (n != NULL)
 
202
        {
 
203
                tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
 
204
                tmp->rts_child = n->rts_child;
 
205
                tmp->rts_blk = n->rts_blk;
 
206
                tmp->rts_parent = o;
 
207
                o = tmp;
 
208
                n = n->rts_parent;
 
209
        }
 
210
 
 
211
        freestack(p->s_stack);
 
212
        p->s_stack = o;
 
213
 
 
214
        PG_RETURN_VOID();
 
215
}
 
216
 
 
217
Datum
 
218
rtendscan(PG_FUNCTION_ARGS)
 
219
{
 
220
        IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
 
221
        RTreeScanOpaque p;
 
222
 
 
223
        p = (RTreeScanOpaque) s->opaque;
 
224
 
 
225
        if (p != NULL)
 
226
        {
 
227
                freestack(p->s_stack);
 
228
                freestack(p->s_markstk);
 
229
                pfree(s->opaque);
 
230
        }
 
231
 
 
232
        rtdropscan(s);
 
233
        /* XXX don't unset read lock -- two-phase locking */
 
234
 
 
235
        PG_RETURN_VOID();
 
236
}
 
237
 
 
238
static void
 
239
rtregscan(IndexScanDesc s)
 
240
{
 
241
        RTScanList      l;
 
242
 
 
243
        l = (RTScanList) palloc(sizeof(RTScanListData));
 
244
        l->rtsl_scan = s;
 
245
        l->rtsl_owner = CurrentResourceOwner;
 
246
        l->rtsl_next = RTScans;
 
247
        RTScans = l;
 
248
}
 
249
 
 
250
static void
 
251
rtdropscan(IndexScanDesc s)
 
252
{
 
253
        RTScanList      l;
 
254
        RTScanList      prev;
 
255
 
 
256
        prev = NULL;
 
257
 
 
258
        for (l = RTScans;
 
259
                 l != NULL && l->rtsl_scan != s;
 
260
                 l = l->rtsl_next)
 
261
                prev = l;
 
262
 
 
263
        if (l == NULL)
 
264
                elog(ERROR, "rtree scan list corrupted -- could not find 0x%p",
 
265
                         (void *) s);
 
266
 
 
267
        if (prev == NULL)
 
268
                RTScans = l->rtsl_next;
 
269
        else
 
270
                prev->rtsl_next = l->rtsl_next;
 
271
 
 
272
        pfree(l);
 
273
}
 
274
 
 
275
/*
 
276
 * ReleaseResources_rtree() --- clean up rtree subsystem resources.
 
277
 *
 
278
 * This is here because it needs to touch this module's static var RTScans.
 
279
 */
 
280
void
 
281
ReleaseResources_rtree(void)
 
282
{
 
283
        RTScanList      l;
 
284
        RTScanList      prev;
 
285
        RTScanList      next;
 
286
 
 
287
        /*
 
288
         * Note: this should be a no-op during normal query shutdown. However,
 
289
         * in an abort situation ExecutorEnd is not called and so there may be
 
290
         * open index scans to clean up.
 
291
         */
 
292
        prev = NULL;
 
293
 
 
294
        for (l = RTScans; l != NULL; l = next)
 
295
        {
 
296
                next = l->rtsl_next;
 
297
                if (l->rtsl_owner == CurrentResourceOwner)
 
298
                {
 
299
                        if (prev == NULL)
 
300
                                RTScans = next;
 
301
                        else
 
302
                                prev->rtsl_next = next;
 
303
 
 
304
                        pfree(l);
 
305
                        /* prev does not change */
 
306
                }
 
307
                else
 
308
                        prev = l;
 
309
        }
 
310
}
 
311
 
 
312
void
 
313
rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum)
 
314
{
 
315
        RTScanList      l;
 
316
        Oid                     relid;
 
317
 
 
318
        relid = RelationGetRelid(r);
 
319
        for (l = RTScans; l != NULL; l = l->rtsl_next)
 
320
        {
 
321
                if (RelationGetRelid(l->rtsl_scan->indexRelation) == relid)
 
322
                        rtadjone(l->rtsl_scan, op, blkno, offnum);
 
323
        }
 
324
}
 
325
 
 
326
/*
 
327
 *      rtadjone() -- adjust one scan for update.
 
328
 *
 
329
 *              By here, the scan passed in is on a modified relation.  Op tells
 
330
 *              us what the modification is, and blkno and offind tell us what
 
331
 *              block and offset index were affected.  This routine checks the
 
332
 *              current and marked positions, and the current and marked stacks,
 
333
 *              to see if any stored location needs to be changed because of the
 
334
 *              update.  If so, we make the change here.
 
335
 */
 
336
static void
 
337
rtadjone(IndexScanDesc s,
 
338
                 int op,
 
339
                 BlockNumber blkno,
 
340
                 OffsetNumber offnum)
 
341
{
 
342
        RTreeScanOpaque so;
 
343
 
 
344
        adjustiptr(s, &(s->currentItemData), op, blkno, offnum);
 
345
        adjustiptr(s, &(s->currentMarkData), op, blkno, offnum);
 
346
 
 
347
        so = (RTreeScanOpaque) s->opaque;
 
348
 
 
349
        if (op == RTOP_SPLIT)
 
350
        {
 
351
                adjuststack(so->s_stack, blkno);
 
352
                adjuststack(so->s_markstk, blkno);
 
353
        }
 
354
}
 
355
 
 
356
/*
 
357
 *      adjustiptr() -- adjust current and marked item pointers in the scan
 
358
 *
 
359
 *              Depending on the type of update and the place it happened, we
 
360
 *              need to do nothing, to back up one record, or to start over on
 
361
 *              the same page.
 
362
 */
 
363
static void
 
364
adjustiptr(IndexScanDesc s,
 
365
                   ItemPointer iptr,
 
366
                   int op,
 
367
                   BlockNumber blkno,
 
368
                   OffsetNumber offnum)
 
369
{
 
370
        OffsetNumber curoff;
 
371
        RTreeScanOpaque so;
 
372
 
 
373
        if (ItemPointerIsValid(iptr))
 
374
        {
 
375
                if (ItemPointerGetBlockNumber(iptr) == blkno)
 
376
                {
 
377
                        curoff = ItemPointerGetOffsetNumber(iptr);
 
378
                        so = (RTreeScanOpaque) s->opaque;
 
379
 
 
380
                        switch (op)
 
381
                        {
 
382
                                case RTOP_DEL:
 
383
                                        /* back up one if we need to */
 
384
                                        if (curoff >= offnum)
 
385
                                        {
 
386
 
 
387
                                                if (curoff > FirstOffsetNumber)
 
388
                                                {
 
389
                                                        /* just adjust the item pointer */
 
390
                                                        ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff));
 
391
                                                }
 
392
                                                else
 
393
                                                {
 
394
                                                        /*
 
395
                                                         * remember that we're before the current
 
396
                                                         * tuple
 
397
                                                         */
 
398
                                                        ItemPointerSet(iptr, blkno, FirstOffsetNumber);
 
399
                                                        if (iptr == &(s->currentItemData))
 
400
                                                                so->s_flags |= RTS_CURBEFORE;
 
401
                                                        else
 
402
                                                                so->s_flags |= RTS_MRKBEFORE;
 
403
                                                }
 
404
                                        }
 
405
                                        break;
 
406
 
 
407
                                case RTOP_SPLIT:
 
408
                                        /* back to start of page on split */
 
409
                                        ItemPointerSet(iptr, blkno, FirstOffsetNumber);
 
410
                                        if (iptr == &(s->currentItemData))
 
411
                                                so->s_flags &= ~RTS_CURBEFORE;
 
412
                                        else
 
413
                                                so->s_flags &= ~RTS_MRKBEFORE;
 
414
                                        break;
 
415
 
 
416
                                default:
 
417
                                        elog(ERROR, "unrecognized operation in rtree scan adjust: %d", op);
 
418
                        }
 
419
                }
 
420
        }
 
421
}
 
422
 
 
423
/*
 
424
 *      adjuststack() -- adjust the supplied stack for a split on a page in
 
425
 *                                       the index we're scanning.
 
426
 *
 
427
 *              If a page on our parent stack has split, we need to back up to the
 
428
 *              beginning of the page and rescan it.  The reason for this is that
 
429
 *              the split algorithm for rtrees doesn't order tuples in any useful
 
430
 *              way on a single page.  This means on that a split, we may wind up
 
431
 *              looking at some heap tuples more than once.  This is handled in the
 
432
 *              access method update code for heaps; if we've modified the tuple we
 
433
 *              are looking at already in this transaction, we ignore the update
 
434
 *              request.
 
435
 */
 
436
static void
 
437
adjuststack(RTSTACK *stk, BlockNumber blkno)
 
438
{
 
439
        while (stk != NULL)
 
440
        {
 
441
                if (stk->rts_blk == blkno)
 
442
                        stk->rts_child = FirstOffsetNumber;
 
443
 
 
444
                stk = stk->rts_parent;
 
445
        }
 
446
}