1
/*-------------------------------------------------------------------------
4
* page utilities routines for the postgres inverted index access method.
7
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
8
* Portions Copyright (c) 1994, Regents of the University of California
11
* src/backend/access/gin/gindatapage.c
12
*-------------------------------------------------------------------------
17
#include "access/gin_private.h"
18
#include "storage/bufmgr.h"
19
#include "utils/rel.h"
22
ginCompareItemPointers(ItemPointer a, ItemPointer b)
24
BlockNumber ba = GinItemPointerGetBlockNumber(a);
25
BlockNumber bb = GinItemPointerGetBlockNumber(b);
29
OffsetNumber oa = GinItemPointerGetOffsetNumber(a);
30
OffsetNumber ob = GinItemPointerGetOffsetNumber(b);
34
return (oa > ob) ? 1 : -1;
37
return (ba > bb) ? 1 : -1;
41
* Merge two ordered arrays of itempointers, eliminating any duplicates.
42
* Returns the number of items in the result.
43
* Caller is responsible that there is enough space at *dst.
46
ginMergeItemPointers(ItemPointerData *dst,
47
ItemPointerData *a, uint32 na,
48
ItemPointerData *b, uint32 nb)
50
ItemPointerData *dptr = dst;
51
ItemPointerData *aptr = a,
54
while (aptr - a < na && bptr - b < nb)
56
int cmp = ginCompareItemPointers(aptr, bptr);
62
/* we want only one copy of the identical items */
80
* Checks, should we move to right link...
81
* Compares inserting itemp pointer with right bound of current page
84
dataIsMoveRight(GinBtree btree, Page page)
86
ItemPointer iptr = GinDataPageGetRightBound(page);
88
if (GinPageRightMost(page))
91
return (ginCompareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
95
* Find correct PostingItem in non-leaf page. It supposed that page
96
* correctly chosen and searching value SHOULD be on page
99
dataLocateItem(GinBtree btree, GinBtreeStack *stack)
104
PostingItem *pitem = NULL;
106
Page page = BufferGetPage(stack->buffer);
108
Assert(!GinPageIsLeaf(page));
109
Assert(GinPageIsData(page));
113
stack->off = FirstOffsetNumber;
114
stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
115
return btree->getLeftMostPage(btree, page);
118
low = FirstOffsetNumber;
119
maxoff = high = GinPageGetOpaque(page)->maxoff;
126
OffsetNumber mid = low + ((high - low) / 2);
128
pitem = (PostingItem *) GinDataPageGetItem(page, mid);
133
* Right infinity, page already correctly chosen with a help of
140
pitem = (PostingItem *) GinDataPageGetItem(page, mid);
141
result = ginCompareItemPointers(btree->items + btree->curitem, &(pitem->key));
147
return PostingItemGetBlockNumber(pitem);
155
Assert(high >= FirstOffsetNumber && high <= maxoff);
158
pitem = (PostingItem *) GinDataPageGetItem(page, high);
159
return PostingItemGetBlockNumber(pitem);
163
* Searches correct position for value on leaf page.
164
* Page should be correctly chosen.
165
* Returns true if value found on page.
168
dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
170
Page page = BufferGetPage(stack->buffer);
175
Assert(GinPageIsLeaf(page));
176
Assert(GinPageIsData(page));
180
stack->off = FirstOffsetNumber;
184
low = FirstOffsetNumber;
185
high = GinPageGetOpaque(page)->maxoff;
189
stack->off = FirstOffsetNumber;
197
OffsetNumber mid = low + ((high - low) / 2);
199
result = ginCompareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid));
217
* Finds links to blkno on non-leaf page, returns
218
* offset of PostingItem
221
dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
224
maxoff = GinPageGetOpaque(page)->maxoff;
227
Assert(!GinPageIsLeaf(page));
228
Assert(GinPageIsData(page));
230
/* if page isn't changed, we return storedOff */
231
if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
233
pitem = (PostingItem *) GinDataPageGetItem(page, storedOff);
234
if (PostingItemGetBlockNumber(pitem) == blkno)
238
* we hope, that needed pointer goes to right. It's true if there
241
for (i = storedOff + 1; i <= maxoff; i++)
243
pitem = (PostingItem *) GinDataPageGetItem(page, i);
244
if (PostingItemGetBlockNumber(pitem) == blkno)
248
maxoff = storedOff - 1;
252
for (i = FirstOffsetNumber; i <= maxoff; i++)
254
pitem = (PostingItem *) GinDataPageGetItem(page, i);
255
if (PostingItemGetBlockNumber(pitem) == blkno)
259
return InvalidOffsetNumber;
263
* returns blkno of leftmost child
266
dataGetLeftMostPage(GinBtree btree, Page page)
270
Assert(!GinPageIsLeaf(page));
271
Assert(GinPageIsData(page));
272
Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
274
pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber);
275
return PostingItemGetBlockNumber(pitem);
279
* add ItemPointer or PostingItem to page. data should point to
280
* correct value! depending on leaf or non-leaf page
283
GinDataPageAddItem(Page page, void *data, OffsetNumber offset)
285
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
288
if (offset == InvalidOffsetNumber)
290
ptr = GinDataPageGetItem(page, maxoff + 1);
294
ptr = GinDataPageGetItem(page, offset);
295
if (maxoff + 1 - offset != 0)
296
memmove(ptr + GinSizeOfDataPageItem(page),
298
(maxoff - offset + 1) * GinSizeOfDataPageItem(page));
300
memcpy(ptr, data, GinSizeOfDataPageItem(page));
302
GinPageGetOpaque(page)->maxoff++;
306
* Deletes posting item from non-leaf page
309
GinPageDeletePostingItem(Page page, OffsetNumber offset)
311
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
313
Assert(!GinPageIsLeaf(page));
314
Assert(offset >= FirstOffsetNumber && offset <= maxoff);
316
if (offset != maxoff)
317
memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1),
318
sizeof(PostingItem) * (maxoff - offset));
320
GinPageGetOpaque(page)->maxoff--;
324
* checks space to install new value,
325
* item pointer never deletes!
328
dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
330
Page page = BufferGetPage(buf);
332
Assert(GinPageIsData(page));
333
Assert(!btree->isDelete);
335
if (GinPageIsLeaf(page))
337
if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
339
if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
342
else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
345
else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
352
* In case of previous split update old child blkno to
354
* item pointer never deletes!
357
dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
359
BlockNumber ret = InvalidBlockNumber;
361
Assert(GinPageIsData(page));
363
if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
365
PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off);
367
PostingItemSetBlockNumber(pitem, btree->rightblkno);
368
ret = btree->rightblkno;
371
btree->rightblkno = InvalidBlockNumber;
377
* Places keys to page and fills WAL record. In case leaf page and
378
* build mode puts all ItemPointers to page.
381
dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
383
Page page = BufferGetPage(buf);
384
int sizeofitem = GinSizeOfDataPageItem(page);
387
/* these must be static so they can be returned to caller */
388
static XLogRecData rdata[3];
389
static ginxlogInsert data;
392
Assert(GinPageIsData(page));
394
data.updateBlkno = dataPrepareData(btree, page, off);
396
data.node = btree->index->rd_node;
397
data.blkno = BufferGetBlockNumber(buf);
400
data.isDelete = FALSE;
402
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
405
* Prevent full page write if child's split occurs. That is needed to
406
* remove incomplete splits while replaying WAL
408
* data.updateBlkno contains new block number (of newly created right
409
* page) for recently splited page.
411
if (data.updateBlkno == InvalidBlockNumber)
413
rdata[0].buffer = buf;
414
rdata[0].buffer_std = FALSE;
415
rdata[0].data = NULL;
417
rdata[0].next = &rdata[1];
421
rdata[cnt].buffer = InvalidBuffer;
422
rdata[cnt].data = (char *) &data;
423
rdata[cnt].len = sizeof(ginxlogInsert);
424
rdata[cnt].next = &rdata[cnt + 1];
427
rdata[cnt].buffer = InvalidBuffer;
428
rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
429
rdata[cnt].len = sizeofitem;
430
rdata[cnt].next = NULL;
432
if (GinPageIsLeaf(page))
434
if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
436
/* usually, create index... */
437
uint32 savedPos = btree->curitem;
439
while (btree->curitem < btree->nitem)
441
GinDataPageAddItem(page, btree->items + btree->curitem, off);
445
data.nitem = btree->curitem - savedPos;
446
rdata[cnt].len = sizeofitem * data.nitem;
450
GinDataPageAddItem(page, btree->items + btree->curitem, off);
455
GinDataPageAddItem(page, &(btree->pitem), off);
459
* split page and fills WAL record. original buffer(lbuf) leaves untouched,
460
* returns shadow page of lbuf filled new data. In leaf page and build mode puts all
461
* ItemPointers to pages. Also, in build mode splits data by way to full fulled
465
dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
468
OffsetNumber separator;
470
Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf));
471
ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
472
int sizeofitem = GinSizeOfDataPageItem(lpage);
473
OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
474
Page rpage = BufferGetPage(rbuf);
475
Size pageSize = PageGetPageSize(lpage);
479
/* these must be static so they can be returned to caller */
480
static ginxlogSplit data;
481
static XLogRecData rdata[4];
482
static char vector[2 * BLCKSZ];
484
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
485
freeSpace = GinDataPageGetFreeSpace(rpage);
488
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
489
InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
490
data.updateBlkno = dataPrepareData(btree, lpage, off);
492
memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
493
maxoff * sizeofitem);
495
if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
498
while (btree->curitem < btree->nitem &&
499
maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
501
memcpy(vector + maxoff * sizeof(ItemPointerData),
502
btree->items + btree->curitem,
503
sizeof(ItemPointerData));
511
ptr = vector + (off - 1) * sizeofitem;
512
if (maxoff + 1 - off != 0)
513
memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
514
if (GinPageIsLeaf(lpage))
516
memcpy(ptr, btree->items + btree->curitem, sizeofitem);
520
memcpy(ptr, &(btree->pitem), sizeofitem);
526
* we suppose that during index creation table scaned from begin to end,
527
* so ItemPointers are monotonically increased..
529
if (btree->isBuild && GinPageRightMost(lpage))
530
separator = freeSpace / sizeofitem;
532
separator = maxoff / 2;
534
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
535
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
537
memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem);
538
GinPageGetOpaque(lpage)->maxoff = separator;
539
memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber),
540
vector + separator * sizeofitem, (maxoff - separator) * sizeofitem);
541
GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
543
PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
544
if (GinPageIsLeaf(lpage))
545
btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage,
546
GinPageGetOpaque(lpage)->maxoff);
548
btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage,
549
GinPageGetOpaque(lpage)->maxoff))->key;
550
btree->rightblkno = BufferGetBlockNumber(rbuf);
552
/* set up right bound for left page */
553
bound = GinDataPageGetRightBound(lpage);
554
*bound = btree->pitem.key;
556
/* set up right bound for right page */
557
bound = GinDataPageGetRightBound(rpage);
560
data.node = btree->index->rd_node;
561
data.rootBlkno = InvalidBlockNumber;
562
data.lblkno = BufferGetBlockNumber(lbuf);
563
data.rblkno = BufferGetBlockNumber(rbuf);
564
data.separator = separator;
567
data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
568
data.isRootSplit = FALSE;
569
data.rightbound = oldbound;
571
rdata[0].buffer = InvalidBuffer;
572
rdata[0].data = (char *) &data;
573
rdata[0].len = sizeof(ginxlogSplit);
574
rdata[0].next = &rdata[1];
576
rdata[1].buffer = InvalidBuffer;
577
rdata[1].data = vector;
578
rdata[1].len = MAXALIGN(maxoff * sizeofitem);
579
rdata[1].next = NULL;
585
* Fills new root by right bound values from child.
586
* Also called from ginxlog, should not use btree
589
ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
591
Page page = BufferGetPage(root),
592
lpage = BufferGetPage(lbuf),
593
rpage = BufferGetPage(rbuf);
597
li.key = *GinDataPageGetRightBound(lpage);
598
PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
599
GinDataPageAddItem(page, &li, InvalidOffsetNumber);
601
ri.key = *GinDataPageGetRightBound(rpage);
602
PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
603
GinDataPageAddItem(page, &ri, InvalidOffsetNumber);
607
ginPrepareDataScan(GinBtree btree, Relation index)
609
memset(btree, 0, sizeof(GinBtreeData));
611
btree->index = index;
613
btree->findChildPage = dataLocateItem;
614
btree->isMoveRight = dataIsMoveRight;
615
btree->findItem = dataLocateLeafItem;
616
btree->findChildPtr = dataFindChildPtr;
617
btree->getLeftMostPage = dataGetLeftMostPage;
618
btree->isEnoughSpace = dataIsEnoughSpace;
619
btree->placeToPage = dataPlaceToPage;
620
btree->splitPage = dataSplitPage;
621
btree->fillRoot = ginDataFillRoot;
623
btree->isData = TRUE;
624
btree->searchMode = FALSE;
625
btree->isDelete = FALSE;
626
btree->fullScan = FALSE;
627
btree->isBuild = FALSE;
631
ginPrepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
633
GinPostingTreeScan *gdi = (GinPostingTreeScan *) palloc0(sizeof(GinPostingTreeScan));
635
ginPrepareDataScan(&gdi->btree, index);
637
gdi->btree.searchMode = searchMode;
638
gdi->btree.fullScan = searchMode;
640
gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
646
* Inserts array of item pointers, may execute several tree scan (very rare)
649
ginInsertItemPointers(GinPostingTreeScan *gdi,
650
ItemPointerData *items, uint32 nitem,
651
GinStatsData *buildStats)
653
BlockNumber rootBlkno = gdi->stack->blkno;
655
gdi->btree.items = items;
656
gdi->btree.nitem = nitem;
657
gdi->btree.curitem = 0;
659
while (gdi->btree.curitem < gdi->btree.nitem)
662
gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
664
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
666
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
669
* gdi->btree.items[gdi->btree.curitem] already exists in index
671
gdi->btree.curitem++;
672
LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
673
freeGinBtreeStack(gdi->stack);
676
ginInsertValue(&(gdi->btree), gdi->stack, buildStats);
683
ginScanBeginPostingTree(GinPostingTreeScan *gdi)
685
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
686
return gdi->stack->buffer;