1
/*-------------------------------------------------------------------------
4
* heap page pruning and HOT-chain management code
6
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
13
*-------------------------------------------------------------------------
17
#include "access/heapam.h"
18
#include "access/htup.h"
19
#include "access/transam.h"
20
#include "miscadmin.h"
22
#include "storage/bufmgr.h"
23
#include "storage/off.h"
24
#include "utils/inval.h"
25
#include "utils/rel.h"
26
#include "utils/tqual.h"
29
/* Working data for heap_page_prune and subroutines */
32
TransactionId new_prune_xid; /* new prune hint value for page */
33
int nredirected; /* numbers of entries in arrays below */
36
/* arrays that accumulate indexes of items to be changed */
37
OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
38
OffsetNumber nowdead[MaxHeapTuplesPerPage];
39
OffsetNumber nowunused[MaxHeapTuplesPerPage];
40
/* marked[i] is TRUE if item i is entered in one of the above arrays */
41
bool marked[MaxHeapTuplesPerPage + 1];
45
static int heap_prune_chain(Relation relation, Buffer buffer,
46
OffsetNumber rootoffnum,
47
TransactionId OldestXmin,
50
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
51
static void heap_prune_record_redirect(PruneState *prstate,
52
OffsetNumber offnum, OffsetNumber rdoffnum);
53
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
54
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
58
* Optionally prune and repair fragmentation in the specified page.
60
* This is an opportunistic function. It will perform housekeeping
61
* only if the page heuristically looks like a candidate for pruning and we
62
* can acquire buffer cleanup lock without blocking.
64
* Note: this is called quite often. It's important that it fall out quickly
65
* if there's not any use in pruning.
67
* Caller must have pin on the buffer, and must *not* have a lock on it.
69
* OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
70
* or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
73
heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
75
Page page = BufferGetPage(buffer);
79
* Let's see if we really need pruning.
81
* Forget it if page is not hinted to contain something prunable that's
82
* older than OldestXmin.
84
if (!PageIsPrunable(page, OldestXmin))
88
* We prune when a previous UPDATE failed to find enough space on the page
89
* for a new tuple version, or when free space falls below the relation's
90
* fill-factor target (but not less than 10%).
92
* Checking free space here is questionable since we aren't holding any
93
* lock on the buffer; in the worst case we could get a bogus answer. It's
94
* unlikely to be *seriously* wrong, though, since reading either pd_lower
95
* or pd_upper is probably atomic. Avoiding taking a lock seems more
96
* important than sometimes getting a wrong answer in what is after all
97
* just a heuristic estimate.
99
minfree = RelationGetTargetPageFreeSpace(relation,
100
HEAP_DEFAULT_FILLFACTOR);
101
minfree = Max(minfree, BLCKSZ / 10);
103
if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
105
/* OK, try to get exclusive buffer lock */
106
if (!ConditionalLockBufferForCleanup(buffer))
110
* Now that we have buffer lock, get accurate information about the
111
* page's free space, and recheck the heuristic about whether to
112
* prune. (We needn't recheck PageIsPrunable, since no one else could
113
* have pruned while we hold pin.)
115
if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
117
/* OK to prune (though not to remove redirects) */
118
(void) heap_page_prune(relation, buffer, OldestXmin, false, true);
121
/* And release buffer lock */
122
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
128
* Prune and repair fragmentation in the specified page.
130
* Caller must have pin and buffer cleanup lock on the page.
132
* OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
133
* or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
135
* If redirect_move is set, we remove redirecting line pointers by
136
* updating the root line pointer to point directly to the first non-dead
137
* tuple in the chain. NOTE: eliminating the redirect changes the first
138
* tuple's effective CTID, and is therefore unsafe except within VACUUM FULL.
139
* The only reason we support this capability at all is that by using it,
140
* VACUUM FULL need not cope with LP_REDIRECT items at all; which seems a
141
* good thing since VACUUM FULL is overly complicated already.
143
* If report_stats is true then we send the number of reclaimed heap-only
144
* tuples to pgstats. (This must be FALSE during vacuum, since vacuum will
145
* send its own new total to pgstats, and we don't want this delta applied
148
* Returns the number of tuples deleted from the page.
151
heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
152
bool redirect_move, bool report_stats)
155
Page page = BufferGetPage(buffer);
161
* Our strategy is to scan the page and make lists of items to change,
162
* then apply the changes within a critical section. This keeps as
163
* much logic as possible out of the critical section, and also ensures
164
* that WAL replay will work the same as the normal case.
166
* First, inform inval.c that upcoming CacheInvalidateHeapTuple calls
167
* are nontransactional.
170
BeginNonTransactionalInvalidation();
173
* Initialize the new pd_prune_xid value to zero (indicating no
174
* prunable tuples). If we find any tuples which may soon become
175
* prunable, we will save the lowest relevant XID in new_prune_xid.
176
* Also initialize the rest of our working state.
178
prstate.new_prune_xid = InvalidTransactionId;
179
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
180
memset(prstate.marked, 0, sizeof(prstate.marked));
183
maxoff = PageGetMaxOffsetNumber(page);
184
for (offnum = FirstOffsetNumber;
186
offnum = OffsetNumberNext(offnum))
190
/* Ignore items already processed as part of an earlier chain */
191
if (prstate.marked[offnum])
194
/* Nothing to do if slot is empty or already dead */
195
itemid = PageGetItemId(page, offnum);
196
if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
199
/* Process this item or chain of items */
200
ndeleted += heap_prune_chain(relation, buffer, offnum,
207
* Send invalidation messages for any tuples we are about to move.
208
* It is safe to do this now, even though we could theoretically still
209
* fail before making the actual page update, because a useless cache
210
* invalidation doesn't hurt anything. Also, no one else can reload the
211
* tuples while we have exclusive buffer lock, so it's not too early to
212
* send the invals. This avoids sending the invals while inside the
213
* critical section, which is a good thing for robustness.
216
EndNonTransactionalInvalidation();
218
/* Any error while applying the changes is critical */
219
START_CRIT_SECTION();
221
/* Have we found any prunable items? */
222
if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
225
* Apply the planned item changes, then repair page fragmentation,
226
* and update the page's hint bit about whether it has free line
229
heap_page_prune_execute(buffer,
230
prstate.redirected, prstate.nredirected,
231
prstate.nowdead, prstate.ndead,
232
prstate.nowunused, prstate.nunused,
236
* Update the page's pd_prune_xid field to either zero, or the lowest
237
* XID of any soon-prunable tuple.
239
((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
242
* Also clear the "page is full" flag, since there's no point in
243
* repeating the prune/defrag process until something else happens to
248
MarkBufferDirty(buffer);
251
* Emit a WAL HEAP_CLEAN or HEAP_CLEAN_MOVE record showing what we did
253
if (!relation->rd_istemp)
257
recptr = log_heap_clean(relation, buffer,
258
prstate.redirected, prstate.nredirected,
259
prstate.nowdead, prstate.ndead,
260
prstate.nowunused, prstate.nunused,
263
PageSetLSN(BufferGetPage(buffer), recptr);
264
PageSetTLI(BufferGetPage(buffer), ThisTimeLineID);
270
* If we didn't prune anything, but have found a new value for the
271
* pd_prune_xid field, update it and mark the buffer dirty.
272
* This is treated as a non-WAL-logged hint.
274
* Also clear the "page is full" flag if it is set, since there's no
275
* point in repeating the prune/defrag process until something else
276
* happens to the page.
278
if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
281
((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
283
SetBufferCommitInfoNeedsSave(buffer);
290
* If requested, report the number of tuples reclaimed to pgstats. This is
291
* ndeleted minus ndead, because we don't want to count a now-DEAD root
292
* item as a deletion for this purpose.
294
if (report_stats && ndeleted > prstate.ndead)
295
pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
298
* XXX Should we update the FSM information of this page ?
300
* There are two schools of thought here. We may not want to update FSM
301
* information so that the page is not used for unrelated UPDATEs/INSERTs
302
* and any free space in this page will remain available for further
303
* UPDATEs in *this* page, thus improving chances for doing HOT updates.
305
* But for a large table and where a page does not receive further UPDATEs
306
* for a long time, we might waste this space by not updating the FSM
307
* information. The relation may get extended and fragmented further.
309
* One possibility is to leave "fillfactor" worth of space in this page
310
* and update FSM with the remaining space.
312
* In any case, the current FSM implementation doesn't accept
313
* one-page-at-a-time updates, so this is all academic for now.
321
* Prune specified item pointer or a HOT chain originating at that item.
323
* If the item is an index-referenced tuple (i.e. not a heap-only tuple),
324
* the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
325
* chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
326
* This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
327
* DEAD, the OldestXmin test is just too coarse to detect it.
329
* The root line pointer is redirected to the tuple immediately after the
330
* latest DEAD tuple. If all tuples in the chain are DEAD, the root line
331
* pointer is marked LP_DEAD. (This includes the case of a DEAD simple
332
* tuple, which we treat as a chain of length 1.)
334
* OldestXmin is the cutoff XID used to identify dead tuples.
336
* We don't actually change the page here, except perhaps for hint-bit updates
337
* caused by HeapTupleSatisfiesVacuum. We just add entries to the arrays in
338
* prstate showing the changes to be made. Items to be redirected are added
339
* to the redirected[] array (two entries per redirection); items to be set to
340
* LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
341
* state are added to nowunused[].
343
* If redirect_move is true, we intend to get rid of redirecting line pointers,
344
* not just make redirection entries.
346
* Returns the number of tuples (to be) deleted from the page.
349
heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
350
TransactionId OldestXmin,
355
Page dp = (Page) BufferGetPage(buffer);
356
TransactionId priorXmax = InvalidTransactionId;
358
HeapTupleHeader htup;
359
OffsetNumber latestdead = InvalidOffsetNumber,
360
redirect_target = InvalidOffsetNumber,
361
maxoff = PageGetMaxOffsetNumber(dp),
363
OffsetNumber chainitems[MaxHeapTuplesPerPage];
367
rootlp = PageGetItemId(dp, rootoffnum);
370
* If it's a heap-only tuple, then it is not the start of a HOT chain.
372
if (ItemIdIsNormal(rootlp))
374
htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
375
if (HeapTupleHeaderIsHeapOnly(htup))
378
* If the tuple is DEAD and doesn't chain to anything else, mark
379
* it unused immediately. (If it does chain, we can only remove
380
* it as part of pruning its chain.)
382
* We need this primarily to handle aborted HOT updates, that is,
383
* XMIN_INVALID heap-only tuples. Those might not be linked to by
384
* any chain, since the parent tuple might be re-updated before
385
* any pruning occurs. So we have to be able to reap them
386
* separately from chain-pruning. (Note that
387
* HeapTupleHeaderIsHotUpdated will never return true for an
388
* XMIN_INVALID tuple, so this code will work even when there were
389
* sequential updates within the aborted transaction.)
391
* Note that we might first arrive at a dead heap-only tuple
392
* either here or while following a chain below. Whichever path
393
* gets there first will mark the tuple unused.
395
if (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer)
396
== HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
398
heap_prune_record_unused(prstate, rootoffnum);
402
/* Nothing more to do */
407
/* Start from the root tuple */
410
/* while not end of the chain */
417
/* Some sanity checks */
418
if (offnum < FirstOffsetNumber || offnum > maxoff)
421
/* If item is already processed, stop --- it must not be same chain */
422
if (prstate->marked[offnum])
425
lp = PageGetItemId(dp, offnum);
427
/* Unused item obviously isn't part of the chain */
428
if (!ItemIdIsUsed(lp))
432
* If we are looking at the redirected root line pointer, jump to the
433
* first normal tuple in the chain. If we find a redirect somewhere
434
* else, stop --- it must not be same chain.
436
if (ItemIdIsRedirected(lp))
439
break; /* not at start of chain */
440
chainitems[nchain++] = offnum;
441
offnum = ItemIdGetRedirect(rootlp);
446
* Likewise, a dead item pointer can't be part of the chain. (We
447
* already eliminated the case of dead root tuple outside this
450
if (ItemIdIsDead(lp))
453
Assert(ItemIdIsNormal(lp));
454
htup = (HeapTupleHeader) PageGetItem(dp, lp);
457
* Check the tuple XMIN against prior XMAX, if any
459
if (TransactionIdIsValid(priorXmax) &&
460
!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
464
* OK, this tuple is indeed a member of the chain.
466
chainitems[nchain++] = offnum;
469
* Check tuple's visibility status.
471
tupdead = recent_dead = false;
473
switch (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer))
479
case HEAPTUPLE_RECENTLY_DEAD:
483
* This tuple may soon become DEAD. Update the hint field so
484
* that the page is reconsidered for pruning in future.
486
heap_prune_record_prunable(prstate,
487
HeapTupleHeaderGetXmax(htup));
490
case HEAPTUPLE_DELETE_IN_PROGRESS:
493
* This tuple may soon become DEAD. Update the hint field so
494
* that the page is reconsidered for pruning in future.
496
heap_prune_record_prunable(prstate,
497
HeapTupleHeaderGetXmax(htup));
501
case HEAPTUPLE_INSERT_IN_PROGRESS:
504
* If we wanted to optimize for aborts, we might consider
505
* marking the page prunable when we see INSERT_IN_PROGRESS.
506
* But we don't. See related decisions about when to mark the
507
* page prunable in heapam.c.
512
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
517
* Remember the last DEAD tuple seen. We will advance past
518
* RECENTLY_DEAD tuples just in case there's a DEAD one after them;
519
* but we can't advance past anything else. (XXX is it really worth
520
* continuing to scan beyond RECENTLY_DEAD? The case where we will
521
* find another DEAD tuple is a fairly unusual corner case.)
525
else if (!recent_dead)
529
* If the tuple is not HOT-updated, then we are at the end of this
532
if (!HeapTupleHeaderIsHotUpdated(htup))
536
* Advance to next chain member.
538
Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
539
BufferGetBlockNumber(buffer));
540
offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
541
priorXmax = HeapTupleHeaderGetXmax(htup);
545
* If we found a DEAD tuple in the chain, adjust the HOT chain so that all
546
* the DEAD tuples at the start of the chain are removed and the root line
547
* pointer is appropriately redirected.
549
if (OffsetNumberIsValid(latestdead))
552
* Mark as unused each intermediate item that we are able to remove
555
* When the previous item is the last dead tuple seen, we are at the
556
* right candidate for redirection.
558
for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
560
heap_prune_record_unused(prstate, chainitems[i]);
565
* If the root entry had been a normal tuple, we are deleting it, so
566
* count it in the result. But changing a redirect (even to DEAD
567
* state) doesn't count.
569
if (ItemIdIsNormal(rootlp))
573
* If the DEAD tuple is at the end of the chain, the entire chain is
574
* dead and the root line pointer can be marked dead. Otherwise just
575
* redirect the root to the correct chain member.
578
heap_prune_record_dead(prstate, rootoffnum);
581
heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
582
/* If the redirection will be a move, need more processing */
584
redirect_target = chainitems[i];
587
else if (nchain < 2 && ItemIdIsRedirected(rootlp))
590
* We found a redirect item that doesn't point to a valid follow-on
591
* item. This can happen if the loop in heap_page_prune caused us to
592
* visit the dead successor of a redirect item before visiting the
593
* redirect item. We can clean up by setting the redirect item to
596
heap_prune_record_dead(prstate, rootoffnum);
598
else if (redirect_move && ItemIdIsRedirected(rootlp))
601
* If we desire to eliminate LP_REDIRECT items by moving tuples,
602
* make a redirection entry for each redirected root item; this
603
* will cause heap_page_prune_execute to actually do the move.
604
* (We get here only when there are no DEAD tuples in the chain;
605
* otherwise the redirection entry was made above.)
607
heap_prune_record_redirect(prstate, rootoffnum, chainitems[1]);
608
redirect_target = chainitems[1];
612
* If we are going to implement a redirect by moving tuples, we have
613
* to issue a cache invalidation against the redirection target tuple,
614
* because its CTID will be effectively changed by the move. Note that
615
* CacheInvalidateHeapTuple only queues the request, it doesn't send it;
616
* if we fail before reaching EndNonTransactionalInvalidation, nothing
617
* happens and no harm is done.
619
if (OffsetNumberIsValid(redirect_target))
621
ItemId firstlp = PageGetItemId(dp, redirect_target);
622
HeapTupleData firsttup;
624
Assert(ItemIdIsNormal(firstlp));
625
/* Set up firsttup to reference the tuple at its existing CTID */
626
firsttup.t_data = (HeapTupleHeader) PageGetItem(dp, firstlp);
627
firsttup.t_len = ItemIdGetLength(firstlp);
628
ItemPointerSet(&firsttup.t_self,
629
BufferGetBlockNumber(buffer),
631
firsttup.t_tableOid = RelationGetRelid(relation);
632
CacheInvalidateHeapTuple(relation, &firsttup);
638
/* Record lowest soon-prunable XID */
640
heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
643
* This should exactly match the PageSetPrunable macro. We can't store
644
* directly into the page header yet, so we update working state.
646
Assert(TransactionIdIsNormal(xid));
647
if (!TransactionIdIsValid(prstate->new_prune_xid) ||
648
TransactionIdPrecedes(xid, prstate->new_prune_xid))
649
prstate->new_prune_xid = xid;
652
/* Record item pointer to be redirected */
654
heap_prune_record_redirect(PruneState *prstate,
655
OffsetNumber offnum, OffsetNumber rdoffnum)
657
Assert(prstate->nredirected < MaxHeapTuplesPerPage);
658
prstate->redirected[prstate->nredirected * 2] = offnum;
659
prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
660
prstate->nredirected++;
661
Assert(!prstate->marked[offnum]);
662
prstate->marked[offnum] = true;
663
Assert(!prstate->marked[rdoffnum]);
664
prstate->marked[rdoffnum] = true;
667
/* Record item pointer to be marked dead */
669
heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
671
Assert(prstate->ndead < MaxHeapTuplesPerPage);
672
prstate->nowdead[prstate->ndead] = offnum;
674
Assert(!prstate->marked[offnum]);
675
prstate->marked[offnum] = true;
678
/* Record item pointer to be marked unused */
680
heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
682
Assert(prstate->nunused < MaxHeapTuplesPerPage);
683
prstate->nowunused[prstate->nunused] = offnum;
685
Assert(!prstate->marked[offnum]);
686
prstate->marked[offnum] = true;
691
* Perform the actual page changes needed by heap_page_prune.
692
* It is expected that the caller has suitable pin and lock on the
693
* buffer, and is inside a critical section.
695
* This is split out because it is also used by heap_xlog_clean()
696
* to replay the WAL record when needed after a crash. Note that the
697
* arguments are identical to those of log_heap_clean().
700
heap_page_prune_execute(Buffer buffer,
701
OffsetNumber *redirected, int nredirected,
702
OffsetNumber *nowdead, int ndead,
703
OffsetNumber *nowunused, int nunused,
706
Page page = (Page) BufferGetPage(buffer);
707
OffsetNumber *offnum;
710
/* Update all redirected or moved line pointers */
712
for (i = 0; i < nredirected; i++)
714
OffsetNumber fromoff = *offnum++;
715
OffsetNumber tooff = *offnum++;
716
ItemId fromlp = PageGetItemId(page, fromoff);
720
/* Physically move the "to" item to the "from" slot */
721
ItemId tolp = PageGetItemId(page, tooff);
722
HeapTupleHeader htup;
725
ItemIdSetUnused(tolp);
728
* Change heap-only status of the tuple because after the line
729
* pointer manipulation, it's no longer a heap-only tuple, but is
730
* directly pointed to by index entries.
732
Assert(ItemIdIsNormal(fromlp));
733
htup = (HeapTupleHeader) PageGetItem(page, fromlp);
734
Assert(HeapTupleHeaderIsHeapOnly(htup));
735
HeapTupleHeaderClearHeapOnly(htup);
739
/* Just insert a REDIRECT link at fromoff */
740
ItemIdSetRedirect(fromlp, tooff);
744
/* Update all now-dead line pointers */
746
for (i = 0; i < ndead; i++)
748
OffsetNumber off = *offnum++;
749
ItemId lp = PageGetItemId(page, off);
754
/* Update all now-unused line pointers */
756
for (i = 0; i < nunused; i++)
758
OffsetNumber off = *offnum++;
759
ItemId lp = PageGetItemId(page, off);
765
* Finally, repair any fragmentation, and update the page's hint bit about
766
* whether it has free pointers.
768
PageRepairFragmentation(page);
773
* For all items in this page, find their respective root line pointers.
774
* If item k is part of a HOT-chain with root at item j, then we set
775
* root_offsets[k - 1] = j.
777
* The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
778
* We zero out all unused entries.
780
* The function must be called with at least share lock on the buffer, to
781
* prevent concurrent prune operations.
783
* Note: The information collected here is valid only as long as the caller
784
* holds a pin on the buffer. Once pin is released, a tuple might be pruned
785
* and reused by a completely unrelated tuple.
788
heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
793
MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
795
maxoff = PageGetMaxOffsetNumber(page);
796
for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
798
ItemId lp = PageGetItemId(page, offnum);
799
HeapTupleHeader htup;
800
OffsetNumber nextoffnum;
801
TransactionId priorXmax;
803
/* skip unused and dead items */
804
if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
807
if (ItemIdIsNormal(lp))
809
htup = (HeapTupleHeader) PageGetItem(page, lp);
812
* Check if this tuple is part of a HOT-chain rooted at some other
813
* tuple. If so, skip it for now; we'll process it when we find
816
if (HeapTupleHeaderIsHeapOnly(htup))
820
* This is either a plain tuple or the root of a HOT-chain.
821
* Remember it in the mapping.
823
root_offsets[offnum - 1] = offnum;
825
/* If it's not the start of a HOT-chain, we're done with it */
826
if (!HeapTupleHeaderIsHotUpdated(htup))
829
/* Set up to scan the HOT-chain */
830
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
831
priorXmax = HeapTupleHeaderGetXmax(htup);
835
/* Must be a redirect item. We do not set its root_offsets entry */
836
Assert(ItemIdIsRedirected(lp));
837
/* Set up to scan the HOT-chain */
838
nextoffnum = ItemIdGetRedirect(lp);
839
priorXmax = InvalidTransactionId;
843
* Now follow the HOT-chain and collect other tuples in the chain.
845
* Note: Even though this is a nested loop, the complexity of the
846
* function is O(N) because a tuple in the page should be visited not
847
* more than twice, once in the outer loop and once in HOT-chain
852
lp = PageGetItemId(page, nextoffnum);
854
/* Check for broken chains */
855
if (!ItemIdIsNormal(lp))
858
htup = (HeapTupleHeader) PageGetItem(page, lp);
860
if (TransactionIdIsValid(priorXmax) &&
861
!TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
864
/* Remember the root line pointer for this item */
865
root_offsets[nextoffnum - 1] = offnum;
867
/* Advance to next chain member, if any */
868
if (!HeapTupleHeaderIsHotUpdated(htup))
871
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
872
priorXmax = HeapTupleHeaderGetXmax(htup);