~ubuntu-branches/debian/experimental/postgresql-11/experimental

« back to all changes in this revision

Viewing changes to src/backend/access/gin/ginentrypage.c

  • Committer: Package Import Robot
  • Author(s): Christoph Berg
  • Date: 2018-05-22 14:19:08 UTC
  • Revision ID: package-import@ubuntu.com-20180522141908-0oy9ujs1b5vrda74
Tags: upstream-11~beta1
ImportĀ upstreamĀ versionĀ 11~beta1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * ginentrypage.c
 
4
 *        routines for handling GIN entry tree pages.
 
5
 *
 
6
 *
 
7
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
 
8
 * Portions Copyright (c) 1994, Regents of the University of California
 
9
 *
 
10
 * IDENTIFICATION
 
11
 *                      src/backend/access/gin/ginentrypage.c
 
12
 *-------------------------------------------------------------------------
 
13
 */
 
14
 
 
15
#include "postgres.h"
 
16
 
 
17
#include "access/gin_private.h"
 
18
#include "access/ginxlog.h"
 
19
#include "access/xloginsert.h"
 
20
#include "miscadmin.h"
 
21
#include "utils/rel.h"
 
22
 
 
23
static void entrySplitPage(GinBtree btree, Buffer origbuf,
 
24
                           GinBtreeStack *stack,
 
25
                           GinBtreeEntryInsertData *insertData,
 
26
                           BlockNumber updateblkno,
 
27
                           Page *newlpage, Page *newrpage);
 
28
 
 
29
/*
 
30
 * Form a tuple for entry tree.
 
31
 *
 
32
 * If the tuple would be too big to be stored, function throws a suitable
 
33
 * error if errorTooBig is true, or returns NULL if errorTooBig is false.
 
34
 *
 
35
 * See src/backend/access/gin/README for a description of the index tuple
 
36
 * format that is being built here.  We build on the assumption that we
 
37
 * are making a leaf-level key entry containing a posting list of nipd items.
 
38
 * If the caller is actually trying to make a posting-tree entry, non-leaf
 
39
 * entry, or pending-list entry, it should pass dataSize = 0 and then overwrite
 
40
 * the t_tid fields as necessary.  In any case, 'data' can be NULL to skip
 
41
 * filling in the posting list; the caller is responsible for filling it
 
42
 * afterwards if data = NULL and nipd > 0.
 
43
 */
 
44
IndexTuple
 
45
GinFormTuple(GinState *ginstate,
 
46
                         OffsetNumber attnum, Datum key, GinNullCategory category,
 
47
                         Pointer data, Size dataSize, int nipd,
 
48
                         bool errorTooBig)
 
49
{
 
50
        Datum           datums[2];
 
51
        bool            isnull[2];
 
52
        IndexTuple      itup;
 
53
        uint32          newsize;
 
54
 
 
55
        /* Build the basic tuple: optional column number, plus key datum */
 
56
        if (ginstate->oneCol)
 
57
        {
 
58
                datums[0] = key;
 
59
                isnull[0] = (category != GIN_CAT_NORM_KEY);
 
60
        }
 
61
        else
 
62
        {
 
63
                datums[0] = UInt16GetDatum(attnum);
 
64
                isnull[0] = false;
 
65
                datums[1] = key;
 
66
                isnull[1] = (category != GIN_CAT_NORM_KEY);
 
67
        }
 
68
 
 
69
        itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull);
 
70
 
 
71
        /*
 
72
         * Determine and store offset to the posting list, making sure there is
 
73
         * room for the category byte if needed.
 
74
         *
 
75
         * Note: because index_form_tuple MAXALIGNs the tuple size, there may well
 
76
         * be some wasted pad space.  Is it worth recomputing the data length to
 
77
         * prevent that?  That would also allow us to Assert that the real data
 
78
         * doesn't overlap the GinNullCategory byte, which this code currently
 
79
         * takes on faith.
 
80
         */
 
81
        newsize = IndexTupleSize(itup);
 
82
 
 
83
        if (IndexTupleHasNulls(itup))
 
84
        {
 
85
                uint32          minsize;
 
86
 
 
87
                Assert(category != GIN_CAT_NORM_KEY);
 
88
                minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory);
 
89
                newsize = Max(newsize, minsize);
 
90
        }
 
91
 
 
92
        newsize = SHORTALIGN(newsize);
 
93
 
 
94
        GinSetPostingOffset(itup, newsize);
 
95
        GinSetNPosting(itup, nipd);
 
96
 
 
97
        /*
 
98
         * Add space needed for posting list, if any.  Then check that the tuple
 
99
         * won't be too big to store.
 
100
         */
 
101
        newsize += dataSize;
 
102
 
 
103
        newsize = MAXALIGN(newsize);
 
104
 
 
105
        if (newsize > GinMaxItemSize)
 
106
        {
 
107
                if (errorTooBig)
 
108
                        ereport(ERROR,
 
109
                                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 
110
                                         errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
 
111
                                                        (Size) newsize, (Size) GinMaxItemSize,
 
112
                                                        RelationGetRelationName(ginstate->index))));
 
113
                pfree(itup);
 
114
                return NULL;
 
115
        }
 
116
 
 
117
        /*
 
118
         * Resize tuple if needed
 
119
         */
 
120
        if (newsize != IndexTupleSize(itup))
 
121
        {
 
122
                itup = repalloc(itup, newsize);
 
123
 
 
124
                /*
 
125
                 * PostgreSQL 9.3 and earlier did not clear this new space, so we
 
126
                 * might find uninitialized padding when reading tuples from disk.
 
127
                 */
 
128
                memset((char *) itup + IndexTupleSize(itup),
 
129
                           0, newsize - IndexTupleSize(itup));
 
130
                /* set new size in tuple header */
 
131
                itup->t_info &= ~INDEX_SIZE_MASK;
 
132
                itup->t_info |= newsize;
 
133
        }
 
134
 
 
135
        /*
 
136
         * Copy in the posting list, if provided
 
137
         */
 
138
        if (data)
 
139
        {
 
140
                char       *ptr = GinGetPosting(itup);
 
141
 
 
142
                memcpy(ptr, data, dataSize);
 
143
        }
 
144
 
 
145
        /*
 
146
         * Insert category byte, if needed
 
147
         */
 
148
        if (category != GIN_CAT_NORM_KEY)
 
149
        {
 
150
                Assert(IndexTupleHasNulls(itup));
 
151
                GinSetNullCategory(itup, ginstate, category);
 
152
        }
 
153
        return itup;
 
154
}
 
155
 
 
156
/*
 
157
 * Read item pointers from leaf entry tuple.
 
158
 *
 
159
 * Returns a palloc'd array of ItemPointers. The number of items is returned
 
160
 * in *nitems.
 
161
 */
 
162
ItemPointer
 
163
ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup,
 
164
                         int *nitems)
 
165
{
 
166
        Pointer         ptr = GinGetPosting(itup);
 
167
        int                     nipd = GinGetNPosting(itup);
 
168
        ItemPointer ipd;
 
169
        int                     ndecoded;
 
170
 
 
171
        if (GinItupIsCompressed(itup))
 
172
        {
 
173
                if (nipd > 0)
 
174
                {
 
175
                        ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded);
 
176
                        if (nipd != ndecoded)
 
177
                                elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
 
178
                                         nipd, ndecoded);
 
179
                }
 
180
                else
 
181
                {
 
182
                        ipd = palloc(0);
 
183
                }
 
184
        }
 
185
        else
 
186
        {
 
187
                ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd);
 
188
                memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd);
 
189
        }
 
190
        *nitems = nipd;
 
191
        return ipd;
 
192
}
 
193
 
 
194
/*
 
195
 * Form a non-leaf entry tuple by copying the key data from the given tuple,
 
196
 * which can be either a leaf or non-leaf entry tuple.
 
197
 *
 
198
 * Any posting list in the source tuple is not copied.  The specified child
 
199
 * block number is inserted into t_tid.
 
200
 */
 
201
static IndexTuple
 
202
GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk)
 
203
{
 
204
        IndexTuple      nitup;
 
205
 
 
206
        if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
 
207
        {
 
208
                /* Tuple contains a posting list, just copy stuff before that */
 
209
                uint32          origsize = GinGetPostingOffset(itup);
 
210
 
 
211
                origsize = MAXALIGN(origsize);
 
212
                nitup = (IndexTuple) palloc(origsize);
 
213
                memcpy(nitup, itup, origsize);
 
214
                /* ... be sure to fix the size header field ... */
 
215
                nitup->t_info &= ~INDEX_SIZE_MASK;
 
216
                nitup->t_info |= origsize;
 
217
        }
 
218
        else
 
219
        {
 
220
                /* Copy the tuple as-is */
 
221
                nitup = (IndexTuple) palloc(IndexTupleSize(itup));
 
222
                memcpy(nitup, itup, IndexTupleSize(itup));
 
223
        }
 
224
 
 
225
        /* Now insert the correct downlink */
 
226
        GinSetDownlink(nitup, childblk);
 
227
 
 
228
        return nitup;
 
229
}
 
230
 
 
231
/*
 
232
 * Entry tree is a "static", ie tuple never deletes from it,
 
233
 * so we don't use right bound, we use rightmost key instead.
 
234
 */
 
235
static IndexTuple
 
236
getRightMostTuple(Page page)
 
237
{
 
238
        OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
 
239
 
 
240
        return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
 
241
}
 
242
 
 
243
static bool
 
244
entryIsMoveRight(GinBtree btree, Page page)
 
245
{
 
246
        IndexTuple      itup;
 
247
        OffsetNumber attnum;
 
248
        Datum           key;
 
249
        GinNullCategory category;
 
250
 
 
251
        if (GinPageRightMost(page))
 
252
                return false;
 
253
 
 
254
        itup = getRightMostTuple(page);
 
255
        attnum = gintuple_get_attrnum(btree->ginstate, itup);
 
256
        key = gintuple_get_key(btree->ginstate, itup, &category);
 
257
 
 
258
        if (ginCompareAttEntries(btree->ginstate,
 
259
                                                         btree->entryAttnum, btree->entryKey, btree->entryCategory,
 
260
                                                         attnum, key, category) > 0)
 
261
                return true;
 
262
 
 
263
        return false;
 
264
}
 
265
 
 
266
/*
 
267
 * Find correct tuple in non-leaf page. It supposed that
 
268
 * page correctly chosen and searching value SHOULD be on page
 
269
 */
 
270
static BlockNumber
 
271
entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
 
272
{
 
273
        OffsetNumber low,
 
274
                                high,
 
275
                                maxoff;
 
276
        IndexTuple      itup = NULL;
 
277
        int                     result;
 
278
        Page            page = BufferGetPage(stack->buffer);
 
279
 
 
280
        Assert(!GinPageIsLeaf(page));
 
281
        Assert(!GinPageIsData(page));
 
282
 
 
283
        if (btree->fullScan)
 
284
        {
 
285
                stack->off = FirstOffsetNumber;
 
286
                stack->predictNumber *= PageGetMaxOffsetNumber(page);
 
287
                return btree->getLeftMostChild(btree, page);
 
288
        }
 
289
 
 
290
        low = FirstOffsetNumber;
 
291
        maxoff = high = PageGetMaxOffsetNumber(page);
 
292
        Assert(high >= low);
 
293
 
 
294
        high++;
 
295
 
 
296
        while (high > low)
 
297
        {
 
298
                OffsetNumber mid = low + ((high - low) / 2);
 
299
 
 
300
                if (mid == maxoff && GinPageRightMost(page))
 
301
                {
 
302
                        /* Right infinity */
 
303
                        result = -1;
 
304
                }
 
305
                else
 
306
                {
 
307
                        OffsetNumber attnum;
 
308
                        Datum           key;
 
309
                        GinNullCategory category;
 
310
 
 
311
                        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
 
312
                        attnum = gintuple_get_attrnum(btree->ginstate, itup);
 
313
                        key = gintuple_get_key(btree->ginstate, itup, &category);
 
314
                        result = ginCompareAttEntries(btree->ginstate,
 
315
                                                                                  btree->entryAttnum,
 
316
                                                                                  btree->entryKey,
 
317
                                                                                  btree->entryCategory,
 
318
                                                                                  attnum, key, category);
 
319
                }
 
320
 
 
321
                if (result == 0)
 
322
                {
 
323
                        stack->off = mid;
 
324
                        Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO);
 
325
                        return GinGetDownlink(itup);
 
326
                }
 
327
                else if (result > 0)
 
328
                        low = mid + 1;
 
329
                else
 
330
                        high = mid;
 
331
        }
 
332
 
 
333
        Assert(high >= FirstOffsetNumber && high <= maxoff);
 
334
 
 
335
        stack->off = high;
 
336
        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
 
337
        Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO);
 
338
        return GinGetDownlink(itup);
 
339
}
 
340
 
 
341
/*
 
342
 * Searches correct position for value on leaf page.
 
343
 * Page should be correctly chosen.
 
344
 * Returns true if value found on page.
 
345
 */
 
346
static bool
 
347
entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
 
348
{
 
349
        Page            page = BufferGetPage(stack->buffer);
 
350
        OffsetNumber low,
 
351
                                high;
 
352
 
 
353
        Assert(GinPageIsLeaf(page));
 
354
        Assert(!GinPageIsData(page));
 
355
 
 
356
        if (btree->fullScan)
 
357
        {
 
358
                stack->off = FirstOffsetNumber;
 
359
                return true;
 
360
        }
 
361
 
 
362
        low = FirstOffsetNumber;
 
363
        high = PageGetMaxOffsetNumber(page);
 
364
 
 
365
        if (high < low)
 
366
        {
 
367
                stack->off = FirstOffsetNumber;
 
368
                return false;
 
369
        }
 
370
 
 
371
        high++;
 
372
 
 
373
        while (high > low)
 
374
        {
 
375
                OffsetNumber mid = low + ((high - low) / 2);
 
376
                IndexTuple      itup;
 
377
                OffsetNumber attnum;
 
378
                Datum           key;
 
379
                GinNullCategory category;
 
380
                int                     result;
 
381
 
 
382
                itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
 
383
                attnum = gintuple_get_attrnum(btree->ginstate, itup);
 
384
                key = gintuple_get_key(btree->ginstate, itup, &category);
 
385
                result = ginCompareAttEntries(btree->ginstate,
 
386
                                                                          btree->entryAttnum,
 
387
                                                                          btree->entryKey,
 
388
                                                                          btree->entryCategory,
 
389
                                                                          attnum, key, category);
 
390
                if (result == 0)
 
391
                {
 
392
                        stack->off = mid;
 
393
                        return true;
 
394
                }
 
395
                else if (result > 0)
 
396
                        low = mid + 1;
 
397
                else
 
398
                        high = mid;
 
399
        }
 
400
 
 
401
        stack->off = high;
 
402
        return false;
 
403
}
 
404
 
 
405
static OffsetNumber
 
406
entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
 
407
{
 
408
        OffsetNumber i,
 
409
                                maxoff = PageGetMaxOffsetNumber(page);
 
410
        IndexTuple      itup;
 
411
 
 
412
        Assert(!GinPageIsLeaf(page));
 
413
        Assert(!GinPageIsData(page));
 
414
 
 
415
        /* if page isn't changed, we returns storedOff */
 
416
        if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
 
417
        {
 
418
                itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
 
419
                if (GinGetDownlink(itup) == blkno)
 
420
                        return storedOff;
 
421
 
 
422
                /*
 
423
                 * we hope, that needed pointer goes to right. It's true if there
 
424
                 * wasn't a deletion
 
425
                 */
 
426
                for (i = storedOff + 1; i <= maxoff; i++)
 
427
                {
 
428
                        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
 
429
                        if (GinGetDownlink(itup) == blkno)
 
430
                                return i;
 
431
                }
 
432
                maxoff = storedOff - 1;
 
433
        }
 
434
 
 
435
        /* last chance */
 
436
        for (i = FirstOffsetNumber; i <= maxoff; i++)
 
437
        {
 
438
                itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
 
439
                if (GinGetDownlink(itup) == blkno)
 
440
                        return i;
 
441
        }
 
442
 
 
443
        return InvalidOffsetNumber;
 
444
}
 
445
 
 
446
static BlockNumber
 
447
entryGetLeftMostPage(GinBtree btree, Page page)
 
448
{
 
449
        IndexTuple      itup;
 
450
 
 
451
        Assert(!GinPageIsLeaf(page));
 
452
        Assert(!GinPageIsData(page));
 
453
        Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
 
454
 
 
455
        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
 
456
        return GinGetDownlink(itup);
 
457
}
 
458
 
 
459
static bool
 
460
entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off,
 
461
                                   GinBtreeEntryInsertData *insertData)
 
462
{
 
463
        Size            releasedsz = 0;
 
464
        Size            addedsz;
 
465
        Page            page = BufferGetPage(buf);
 
466
 
 
467
        Assert(insertData->entry);
 
468
        Assert(!GinPageIsData(page));
 
469
 
 
470
        if (insertData->isDelete)
 
471
        {
 
472
                IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
 
473
 
 
474
                releasedsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
 
475
        }
 
476
 
 
477
        addedsz = MAXALIGN(IndexTupleSize(insertData->entry)) + sizeof(ItemIdData);
 
478
 
 
479
        if (PageGetFreeSpace(page) + releasedsz >= addedsz)
 
480
                return true;
 
481
 
 
482
        return false;
 
483
}
 
484
 
 
485
/*
 
486
 * Delete tuple on leaf page if tuples existed and we
 
487
 * should update it, update old child blkno to new right page
 
488
 * if child split occurred
 
489
 */
 
490
static void
 
491
entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
 
492
                                 GinBtreeEntryInsertData *insertData, BlockNumber updateblkno)
 
493
{
 
494
        Assert(insertData->entry);
 
495
        Assert(!GinPageIsData(page));
 
496
 
 
497
        if (insertData->isDelete)
 
498
        {
 
499
                Assert(GinPageIsLeaf(page));
 
500
                PageIndexTupleDelete(page, off);
 
501
        }
 
502
 
 
503
        if (!GinPageIsLeaf(page) && updateblkno != InvalidBlockNumber)
 
504
        {
 
505
                IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
 
506
 
 
507
                GinSetDownlink(itup, updateblkno);
 
508
        }
 
509
}
 
510
 
 
511
/*
 
512
 * Prepare to insert data on an entry page.
 
513
 *
 
514
 * If it will fit, return GPTP_INSERT after doing whatever setup is needed
 
515
 * before we enter the insertion critical section.  *ptp_workspace can be
 
516
 * set to pass information along to the execPlaceToPage function.
 
517
 *
 
518
 * If it won't fit, perform a page split and return two temporary page
 
519
 * images into *newlpage and *newrpage, with result GPTP_SPLIT.
 
520
 *
 
521
 * In neither case should the given page buffer be modified here.
 
522
 *
 
523
 * Note: on insertion to an internal node, in addition to inserting the given
 
524
 * item, the downlink of the existing item at stack->off will be updated to
 
525
 * point to updateblkno.
 
526
 */
 
527
static GinPlaceToPageRC
 
528
entryBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 
529
                                          void *insertPayload, BlockNumber updateblkno,
 
530
                                          void **ptp_workspace,
 
531
                                          Page *newlpage, Page *newrpage)
 
532
{
 
533
        GinBtreeEntryInsertData *insertData = insertPayload;
 
534
        OffsetNumber off = stack->off;
 
535
 
 
536
        /* If it doesn't fit, deal with split case */
 
537
        if (!entryIsEnoughSpace(btree, buf, off, insertData))
 
538
        {
 
539
                entrySplitPage(btree, buf, stack, insertData, updateblkno,
 
540
                                           newlpage, newrpage);
 
541
                return GPTP_SPLIT;
 
542
        }
 
543
 
 
544
        /* Else, we're ready to proceed with insertion */
 
545
        return GPTP_INSERT;
 
546
}
 
547
 
 
548
/*
 
549
 * Perform data insertion after beginPlaceToPage has decided it will fit.
 
550
 *
 
551
 * This is invoked within a critical section, and XLOG record creation (if
 
552
 * needed) is already started.  The target buffer is registered in slot 0.
 
553
 */
 
554
static void
 
555
entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 
556
                                         void *insertPayload, BlockNumber updateblkno,
 
557
                                         void *ptp_workspace)
 
558
{
 
559
        GinBtreeEntryInsertData *insertData = insertPayload;
 
560
        Page            page = BufferGetPage(buf);
 
561
        OffsetNumber off = stack->off;
 
562
        OffsetNumber placed;
 
563
 
 
564
        entryPreparePage(btree, page, off, insertData, updateblkno);
 
565
 
 
566
        placed = PageAddItem(page,
 
567
                                                 (Item) insertData->entry,
 
568
                                                 IndexTupleSize(insertData->entry),
 
569
                                                 off, false, false);
 
570
        if (placed != off)
 
571
                elog(ERROR, "failed to add item to index page in \"%s\"",
 
572
                         RelationGetRelationName(btree->index));
 
573
 
 
574
        if (RelationNeedsWAL(btree->index))
 
575
        {
 
576
                /*
 
577
                 * This must be static, because it has to survive until XLogInsert,
 
578
                 * and we can't palloc here.  Ugly, but the XLogInsert infrastructure
 
579
                 * isn't reentrant anyway.
 
580
                 */
 
581
                static ginxlogInsertEntry data;
 
582
 
 
583
                data.isDelete = insertData->isDelete;
 
584
                data.offset = off;
 
585
 
 
586
                XLogRegisterBufData(0, (char *) &data,
 
587
                                                        offsetof(ginxlogInsertEntry, tuple));
 
588
                XLogRegisterBufData(0, (char *) insertData->entry,
 
589
                                                        IndexTupleSize(insertData->entry));
 
590
        }
 
591
}
 
592
 
 
593
/*
 
594
 * Split entry page and insert new data.
 
595
 *
 
596
 * Returns new temp pages to *newlpage and *newrpage.
 
597
 * The original buffer is left untouched.
 
598
 */
 
599
static void
 
600
entrySplitPage(GinBtree btree, Buffer origbuf,
 
601
                           GinBtreeStack *stack,
 
602
                           GinBtreeEntryInsertData *insertData,
 
603
                           BlockNumber updateblkno,
 
604
                           Page *newlpage, Page *newrpage)
 
605
{
 
606
        OffsetNumber off = stack->off;
 
607
        OffsetNumber i,
 
608
                                maxoff,
 
609
                                separator = InvalidOffsetNumber;
 
610
        Size            totalsize = 0;
 
611
        Size            lsize = 0,
 
612
                                size;
 
613
        char       *ptr;
 
614
        IndexTuple      itup;
 
615
        Page            page;
 
616
        Page            lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
 
617
        Page            rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
 
618
        Size            pageSize = PageGetPageSize(lpage);
 
619
        char            tupstore[2 * BLCKSZ];
 
620
 
 
621
        entryPreparePage(btree, lpage, off, insertData, updateblkno);
 
622
 
 
623
        /*
 
624
         * First, append all the existing tuples and the new tuple we're inserting
 
625
         * one after another in a temporary workspace.
 
626
         */
 
627
        maxoff = PageGetMaxOffsetNumber(lpage);
 
628
        ptr = tupstore;
 
629
        for (i = FirstOffsetNumber; i <= maxoff; i++)
 
630
        {
 
631
                if (i == off)
 
632
                {
 
633
                        size = MAXALIGN(IndexTupleSize(insertData->entry));
 
634
                        memcpy(ptr, insertData->entry, size);
 
635
                        ptr += size;
 
636
                        totalsize += size + sizeof(ItemIdData);
 
637
                }
 
638
 
 
639
                itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
 
640
                size = MAXALIGN(IndexTupleSize(itup));
 
641
                memcpy(ptr, itup, size);
 
642
                ptr += size;
 
643
                totalsize += size + sizeof(ItemIdData);
 
644
        }
 
645
 
 
646
        if (off == maxoff + 1)
 
647
        {
 
648
                size = MAXALIGN(IndexTupleSize(insertData->entry));
 
649
                memcpy(ptr, insertData->entry, size);
 
650
                ptr += size;
 
651
                totalsize += size + sizeof(ItemIdData);
 
652
        }
 
653
 
 
654
        /*
 
655
         * Initialize the left and right pages, and copy all the tuples back to
 
656
         * them.
 
657
         */
 
658
        GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
 
659
        GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
 
660
 
 
661
        ptr = tupstore;
 
662
        maxoff++;
 
663
        lsize = 0;
 
664
 
 
665
        page = lpage;
 
666
        for (i = FirstOffsetNumber; i <= maxoff; i++)
 
667
        {
 
668
                itup = (IndexTuple) ptr;
 
669
 
 
670
                /*
 
671
                 * Decide where to split.  We try to equalize the pages' total data
 
672
                 * size, not number of tuples.
 
673
                 */
 
674
                if (lsize > totalsize / 2)
 
675
                {
 
676
                        if (separator == InvalidOffsetNumber)
 
677
                                separator = i - 1;
 
678
                        page = rpage;
 
679
                }
 
680
                else
 
681
                {
 
682
                        lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
 
683
                }
 
684
 
 
685
                if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
 
686
                        elog(ERROR, "failed to add item to index page in \"%s\"",
 
687
                                 RelationGetRelationName(btree->index));
 
688
                ptr += MAXALIGN(IndexTupleSize(itup));
 
689
        }
 
690
 
 
691
        /* return temp pages to caller */
 
692
        *newlpage = lpage;
 
693
        *newrpage = rpage;
 
694
}
 
695
 
 
696
/*
 
697
 * Construct insertion payload for inserting the downlink for given buffer.
 
698
 */
 
699
static void *
 
700
entryPrepareDownlink(GinBtree btree, Buffer lbuf)
 
701
{
 
702
        GinBtreeEntryInsertData *insertData;
 
703
        Page            lpage = BufferGetPage(lbuf);
 
704
        BlockNumber lblkno = BufferGetBlockNumber(lbuf);
 
705
        IndexTuple      itup;
 
706
 
 
707
        itup = getRightMostTuple(lpage);
 
708
 
 
709
        insertData = palloc(sizeof(GinBtreeEntryInsertData));
 
710
        insertData->entry = GinFormInteriorTuple(itup, lpage, lblkno);
 
711
        insertData->isDelete = false;
 
712
 
 
713
        return insertData;
 
714
}
 
715
 
 
716
/*
 
717
 * Fills new root by rightest values from child.
 
718
 * Also called from ginxlog, should not use btree
 
719
 */
 
720
void
 
721
ginEntryFillRoot(GinBtree btree, Page root,
 
722
                                 BlockNumber lblkno, Page lpage,
 
723
                                 BlockNumber rblkno, Page rpage)
 
724
{
 
725
        IndexTuple      itup;
 
726
 
 
727
        itup = GinFormInteriorTuple(getRightMostTuple(lpage), lpage, lblkno);
 
728
        if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
 
729
                elog(ERROR, "failed to add item to index root page");
 
730
        pfree(itup);
 
731
 
 
732
        itup = GinFormInteriorTuple(getRightMostTuple(rpage), rpage, rblkno);
 
733
        if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
 
734
                elog(ERROR, "failed to add item to index root page");
 
735
        pfree(itup);
 
736
}
 
737
 
 
738
/*
 
739
 * Set up GinBtree for entry page access
 
740
 *
 
741
 * Note: during WAL recovery, there may be no valid data in ginstate
 
742
 * other than a faked-up Relation pointer; the key datum is bogus too.
 
743
 */
 
744
void
 
745
ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
 
746
                                        Datum key, GinNullCategory category,
 
747
                                        GinState *ginstate)
 
748
{
 
749
        memset(btree, 0, sizeof(GinBtreeData));
 
750
 
 
751
        btree->index = ginstate->index;
 
752
        btree->rootBlkno = GIN_ROOT_BLKNO;
 
753
        btree->ginstate = ginstate;
 
754
 
 
755
        btree->findChildPage = entryLocateEntry;
 
756
        btree->getLeftMostChild = entryGetLeftMostPage;
 
757
        btree->isMoveRight = entryIsMoveRight;
 
758
        btree->findItem = entryLocateLeafEntry;
 
759
        btree->findChildPtr = entryFindChildPtr;
 
760
        btree->beginPlaceToPage = entryBeginPlaceToPage;
 
761
        btree->execPlaceToPage = entryExecPlaceToPage;
 
762
        btree->fillRoot = ginEntryFillRoot;
 
763
        btree->prepareDownlink = entryPrepareDownlink;
 
764
 
 
765
        btree->isData = false;
 
766
        btree->fullScan = false;
 
767
        btree->isBuild = false;
 
768
 
 
769
        btree->entryAttnum = attnum;
 
770
        btree->entryKey = key;
 
771
        btree->entryCategory = category;
 
772
}