1
/*-------------------------------------------------------------------------
4
* Concurrent ("lazy") vacuuming.
7
* The major space usage for LAZY VACUUM is storage for the array of dead
8
* tuple TIDs, with the next biggest need being storage for per-disk-page
9
* free space info. We want to ensure we can vacuum even the very largest
10
* relations with finite memory space usage. To do that, we set upper bounds
11
* on the number of tuples and pages we will keep track of at once.
13
* We are willing to use at most maintenance_work_mem memory space to keep
14
* track of dead tuples. We initially allocate an array of TIDs of that size.
15
* If the array threatens to overflow, we suspend the heap scan phase and
16
* perform a pass of index cleanup and page compaction, then resume the heap
17
* scan with an empty TID array.
19
* We can limit the storage for page free space to MaxFSMPages entries,
20
* since that's the most the free space map will be willing to remember
21
* anyway. If the relation has fewer than that many pages with free space,
22
* life is easy: just build an array of per-page info. If it has more,
23
* we store the free space info as a heap ordered by amount of free space,
24
* so that we can discard the pages with least free space to ensure we never
25
* have more than MaxFSMPages entries in all. The surviving page entries
26
* are passed to the free space map at conclusion of the scan.
29
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
30
* Portions Copyright (c) 1994, Regents of the University of California
34
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.50.4.2 2005-05-07 21:32:53 tgl Exp $
36
*-------------------------------------------------------------------------
42
#include "access/genam.h"
43
#include "access/heapam.h"
44
#include "access/xlog.h"
45
#include "commands/vacuum.h"
46
#include "miscadmin.h"
47
#include "storage/freespace.h"
48
#include "storage/sinval.h"
49
#include "storage/smgr.h"
50
#include "utils/lsyscache.h"
54
* Space/time tradeoff parameters: do these need to be user-tunable?
56
* To consider truncating the relation, we want there to be at least
57
* REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
58
* is less) potentially-freeable pages.
60
#define REL_TRUNCATE_MINIMUM 1000
61
#define REL_TRUNCATE_FRACTION 16
63
/* MAX_TUPLES_PER_PAGE can be a conservative upper limit */
64
#define MAX_TUPLES_PER_PAGE ((int) (BLCKSZ / sizeof(HeapTupleHeaderData)))
67
typedef struct LVRelStats
69
/* Overall statistics about rel */
70
BlockNumber rel_pages;
72
BlockNumber pages_removed;
73
double tuples_deleted;
74
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
75
Size threshold; /* minimum interesting free space */
76
/* List of TIDs of tuples we intend to delete */
77
/* NB: this list is ordered by TID address */
78
int num_dead_tuples; /* current # of entries */
79
int max_dead_tuples; /* # slots allocated in array */
80
ItemPointer dead_tuples; /* array of ItemPointerData */
81
/* Array or heap of per-page info about free space */
82
/* We use a simple array until it fills up, then convert to heap */
83
bool fs_is_heap; /* are we using heap organization? */
84
int num_free_pages; /* current # of entries */
85
int max_free_pages; /* # slots allocated in array */
86
PageFreeSpaceInfo *free_pages; /* array or heap of blkno/avail */
90
static int elevel = -1;
92
static TransactionId OldestXmin;
93
static TransactionId FreezeLimit;
96
/* non-export function prototypes */
97
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
98
Relation *Irel, int nindexes);
99
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
100
static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats);
101
static void lazy_vacuum_index(Relation indrel,
102
double *index_tups_vacuumed,
103
BlockNumber *index_pages_removed,
104
LVRelStats *vacrelstats);
105
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
106
int tupindex, LVRelStats *vacrelstats);
107
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
108
static BlockNumber count_nondeletable_pages(Relation onerel,
109
LVRelStats *vacrelstats);
110
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
111
static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
112
ItemPointer itemptr);
113
static void lazy_record_free_space(LVRelStats *vacrelstats,
114
BlockNumber page, Size avail);
115
static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
116
static bool dummy_tid_reaped(ItemPointer itemptr, void *state);
117
static void lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats);
118
static int vac_cmp_itemptr(const void *left, const void *right);
119
static int vac_cmp_page_spaces(const void *left, const void *right);
123
* lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
125
* This routine vacuums a single heap, cleans out its indexes, and
126
* updates its num_pages and num_tuples statistics.
128
* At entry, we have already established a transaction and opened
129
* and locked the relation.
132
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
134
LVRelStats *vacrelstats;
138
BlockNumber possibly_freeable;
140
if (vacstmt->verbose)
145
vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
146
&OldestXmin, &FreezeLimit);
148
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
150
/* Set threshold for interesting free space = average request size */
151
/* XXX should we scale it up or down? Adjust vacuum.c too, if so */
152
vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);
154
/* Open all indexes of the relation */
155
vac_open_indexes(onerel, ShareUpdateExclusiveLock, &nindexes, &Irel);
156
hasindex = (nindexes > 0);
158
/* Do the vacuuming */
159
lazy_scan_heap(onerel, vacrelstats, Irel, nindexes);
161
/* Done with indexes */
162
vac_close_indexes(nindexes, Irel, NoLock);
165
* Optionally truncate the relation.
167
* Don't even think about it unless we have a shot at releasing a goodly
168
* number of pages. Otherwise, the time taken isn't worth it.
170
possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
171
if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
172
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
173
lazy_truncate_heap(onerel, vacrelstats);
175
/* Update shared free space map with final free space info */
176
lazy_update_fsm(onerel, vacrelstats);
178
/* Update statistics in pg_class */
179
vac_update_relstats(RelationGetRelid(onerel),
180
vacrelstats->rel_pages,
181
vacrelstats->rel_tuples,
187
* lazy_scan_heap() -- scan an open heap relation
189
* This routine sets commit status bits, builds lists of dead tuples
190
* and pages with free space, and calculates statistics on the number
191
* of live tuples in the heap. When done, or when we run low on space
192
* for dead-tuple TIDs, invoke vacuuming of indexes and heap.
195
lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
196
Relation *Irel, int nindexes)
202
BlockNumber empty_pages;
207
double *index_tups_vacuumed;
208
BlockNumber *index_pages_removed;
209
bool did_vacuum_index = false;
213
vac_init_rusage(&ru0);
215
relname = RelationGetRelationName(onerel);
217
(errmsg("vacuuming \"%s.%s\"",
218
get_namespace_name(RelationGetNamespace(onerel)),
222
num_tuples = tups_vacuumed = nkeep = nunused = 0;
225
* Because index vacuuming is done in multiple passes, we have to keep
226
* track of the total number of rows and pages removed from each index.
227
* index_tups_vacuumed[i] is the number removed so far from the i'th
228
* index. (For partial indexes this could well be different from
229
* tups_vacuumed.) Likewise for index_pages_removed[i].
231
index_tups_vacuumed = (double *) palloc0(nindexes * sizeof(double));
232
index_pages_removed = (BlockNumber *) palloc0(nindexes * sizeof(BlockNumber));
234
nblocks = RelationGetNumberOfBlocks(onerel);
235
vacrelstats->rel_pages = nblocks;
236
vacrelstats->nonempty_pages = 0;
238
lazy_space_alloc(vacrelstats, nblocks);
240
for (blkno = 0; blkno < nblocks; blkno++)
251
vacuum_delay_point();
254
* If we are close to overrunning the available space for
255
* dead-tuple TIDs, pause and do a cycle of vacuuming before we
258
if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MAX_TUPLES_PER_PAGE &&
259
vacrelstats->num_dead_tuples > 0)
261
/* Remove index entries */
262
for (i = 0; i < nindexes; i++)
263
lazy_vacuum_index(Irel[i],
264
&index_tups_vacuumed[i],
265
&index_pages_removed[i],
267
did_vacuum_index = true;
268
/* Remove tuples from heap */
269
lazy_vacuum_heap(onerel, vacrelstats);
270
/* Forget the now-vacuumed tuples, and press on */
271
vacrelstats->num_dead_tuples = 0;
274
buf = ReadBuffer(onerel, blkno);
276
/* In this phase we only need shared access to the buffer */
277
LockBuffer(buf, BUFFER_LOCK_SHARE);
279
page = BufferGetPage(buf);
284
* An all-zeroes page could be left over if a backend extends
285
* the relation but crashes before initializing the page.
286
* Reclaim such pages for use.
288
* We have to be careful here because we could be looking at
289
* a page that someone has just added to the relation and not
290
* yet been able to initialize (see RelationGetBufferForTuple).
291
* To interlock against that, release the buffer read lock
292
* (which we must do anyway) and grab the relation extension
293
* lock before re-locking in exclusive mode. If the page is
294
* still uninitialized by then, it must be left over from a
295
* crashed backend, and we can initialize it.
297
* We don't really need the relation lock when this is a new
298
* or temp relation, but it's probably not worth the code space
299
* to check that, since this surely isn't a critical path.
301
* Note: the comparable code in vacuum.c need not do all this
302
* because it's got exclusive lock on the whole relation.
304
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
305
LockPage(onerel, 0, ExclusiveLock);
306
UnlockPage(onerel, 0, ExclusiveLock);
307
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
311
(errmsg("relation \"%s\" page %u is uninitialized --- fixing",
313
PageInit(page, BufferGetPageSize(buf), 0);
315
lazy_record_free_space(vacrelstats, blkno,
316
PageGetFreeSpace(page));
318
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
323
if (PageIsEmpty(page))
326
lazy_record_free_space(vacrelstats, blkno,
327
PageGetFreeSpace(page));
328
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
335
prev_dead_count = vacrelstats->num_dead_tuples;
336
maxoff = PageGetMaxOffsetNumber(page);
337
for (offnum = FirstOffsetNumber;
339
offnum = OffsetNumberNext(offnum))
343
itemid = PageGetItemId(page, offnum);
345
if (!ItemIdIsUsed(itemid))
351
tuple.t_datamcxt = NULL;
352
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
353
tuple.t_len = ItemIdGetLength(itemid);
354
ItemPointerSet(&(tuple.t_self), blkno, offnum);
358
switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
361
tupgone = true; /* we can delete the tuple */
366
* Tuple is good. Consider whether to replace its
367
* xmin value with FrozenTransactionId.
369
* NB: Since we hold only a shared buffer lock here, we
370
* are assuming that TransactionId read/write is
371
* atomic. This is not the only place that makes such
372
* an assumption. It'd be possible to avoid the
373
* assumption by momentarily acquiring exclusive lock,
374
* but for the moment I see no need to.
376
if (TransactionIdIsNormal(HeapTupleHeaderGetXmin(tuple.t_data)) &&
377
TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data),
380
HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
381
/* infomask should be okay already */
382
Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED);
389
if (onerel->rd_rel->relhasoids &&
390
!OidIsValid(HeapTupleGetOid(&tuple)))
391
elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
392
relname, blkno, offnum);
394
case HEAPTUPLE_RECENTLY_DEAD:
397
* If tuple is recently deleted then we must not
398
* remove it from relation.
402
case HEAPTUPLE_INSERT_IN_PROGRESS:
403
/* This is an expected case during concurrent vacuum */
405
case HEAPTUPLE_DELETE_IN_PROGRESS:
406
/* This is an expected case during concurrent vacuum */
409
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
415
lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
423
} /* scan along page */
426
* If we remembered any tuples for deletion, then the page will be
427
* visited again by lazy_vacuum_heap, which will compute and
428
* record its post-compaction free space. If not, then we're done
429
* with this page, so remember its free space as-is.
431
if (vacrelstats->num_dead_tuples == prev_dead_count)
433
lazy_record_free_space(vacrelstats, blkno,
434
PageGetFreeSpace(page));
437
/* Remember the location of the last page with nonremovable tuples */
439
vacrelstats->nonempty_pages = blkno + 1;
441
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
449
/* save stats for use later */
450
vacrelstats->rel_tuples = num_tuples;
451
vacrelstats->tuples_deleted = tups_vacuumed;
453
/* If any tuples need to be deleted, perform final vacuum cycle */
454
/* XXX put a threshold on min number of tuples here? */
455
if (vacrelstats->num_dead_tuples > 0)
457
/* Remove index entries */
458
for (i = 0; i < nindexes; i++)
459
lazy_vacuum_index(Irel[i],
460
&index_tups_vacuumed[i],
461
&index_pages_removed[i],
463
/* Remove tuples from heap */
464
lazy_vacuum_heap(onerel, vacrelstats);
466
else if (!did_vacuum_index)
468
/* Must do post-vacuum cleanup and statistics update anyway */
469
for (i = 0; i < nindexes; i++)
470
lazy_scan_index(Irel[i], vacrelstats);
474
(errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
475
RelationGetRelationName(onerel),
476
tups_vacuumed, num_tuples, nblocks),
477
errdetail("%.0f dead row versions cannot be removed yet.\n"
478
"There were %.0f unused item pointers.\n"
479
"%u pages are entirely empty.\n"
484
vac_show_rusage(&ru0))));
489
* lazy_vacuum_heap() -- second pass over the heap
491
* This routine marks dead tuples as unused and compacts out free
492
* space on their pages. Pages not having dead tuples recorded from
493
* lazy_scan_heap are not visited at all.
495
* Note: the reason for doing this as a second pass is we cannot remove
496
* the tuples until we've removed their index entries, and we want to
497
* process index entry removal in batches as large as possible.
500
lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
506
vac_init_rusage(&ru0);
510
while (tupindex < vacrelstats->num_dead_tuples)
516
vacuum_delay_point();
518
tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
519
buf = ReadBuffer(onerel, tblk);
520
LockBufferForCleanup(buf);
521
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
522
/* Now that we've compacted the page, record its available space */
523
page = BufferGetPage(buf);
524
lazy_record_free_space(vacrelstats, tblk,
525
PageGetFreeSpace(page));
526
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
532
(errmsg("\"%s\": removed %d row versions in %d pages",
533
RelationGetRelationName(onerel),
536
vac_show_rusage(&ru0))));
540
* lazy_vacuum_page() -- free dead tuples on a page
541
* and repair its fragmentation.
543
* Caller is expected to handle reading, locking, and writing the buffer.
545
* tupindex is the index in vacrelstats->dead_tuples of the first dead
546
* tuple for this page. We assume the rest follow sequentially.
547
* The return value is the first tupindex after the tuples of this page.
550
lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
551
int tupindex, LVRelStats *vacrelstats)
553
OffsetNumber unused[BLCKSZ / sizeof(OffsetNumber)];
555
Page page = BufferGetPage(buffer);
558
START_CRIT_SECTION();
559
for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
564
tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
566
break; /* past end of tuples for this block */
567
toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
568
itemid = PageGetItemId(page, toff);
569
itemid->lp_flags &= ~LP_USED;
572
uncnt = PageRepairFragmentation(page, unused);
575
if (!onerel->rd_istemp)
579
recptr = log_heap_clean(onerel, buffer, unused, uncnt);
580
PageSetLSN(page, recptr);
581
PageSetTLI(page, ThisTimeLineID);
585
/* No XLOG record, but still need to flag that XID exists on disk */
586
MyXactMadeTempRelUpdate = true;
595
* lazy_scan_index() -- scan one index relation to update pg_class statistic.
597
* We use this when we have no deletions to do.
600
lazy_scan_index(Relation indrel, LVRelStats *vacrelstats)
602
IndexBulkDeleteResult *stats;
603
IndexVacuumCleanupInfo vcinfo;
606
vac_init_rusage(&ru0);
609
* Acquire appropriate type of lock on index: must be exclusive if
610
* index AM isn't concurrent-safe.
612
if (indrel->rd_am->amconcurrent)
613
LockRelation(indrel, RowExclusiveLock);
615
LockRelation(indrel, AccessExclusiveLock);
618
* Even though we're not planning to delete anything, we use the
619
* ambulkdelete call, because (a) the scan happens within the index AM
620
* for more speed, and (b) it may want to pass private statistics to
621
* the amvacuumcleanup call.
623
stats = index_bulk_delete(indrel, dummy_tid_reaped, NULL);
625
/* Do post-VACUUM cleanup, even though we deleted nothing */
626
vcinfo.vacuum_full = false;
627
vcinfo.message_level = elevel;
629
stats = index_vacuum_cleanup(indrel, &vcinfo, stats);
632
* Release lock acquired above.
634
if (indrel->rd_am->amconcurrent)
635
UnlockRelation(indrel, RowExclusiveLock);
637
UnlockRelation(indrel, AccessExclusiveLock);
642
/* now update statistics in pg_class */
643
vac_update_relstats(RelationGetRelid(indrel),
645
stats->num_index_tuples,
649
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
650
RelationGetRelationName(indrel),
651
stats->num_index_tuples,
653
errdetail("%u index pages have been deleted, %u are currently reusable.\n"
655
stats->pages_deleted, stats->pages_free,
656
vac_show_rusage(&ru0))));
662
* lazy_vacuum_index() -- vacuum one index relation.
664
* Delete all the index entries pointing to tuples listed in
665
* vacrelstats->dead_tuples.
667
* Increment *index_tups_vacuumed by the number of index entries
668
* removed, and *index_pages_removed by the number of pages removed.
670
* Finally, we arrange to update the index relation's statistics in
674
lazy_vacuum_index(Relation indrel,
675
double *index_tups_vacuumed,
676
BlockNumber *index_pages_removed,
677
LVRelStats *vacrelstats)
679
IndexBulkDeleteResult *stats;
680
IndexVacuumCleanupInfo vcinfo;
683
vac_init_rusage(&ru0);
686
* Acquire appropriate type of lock on index: must be exclusive if
687
* index AM isn't concurrent-safe.
689
if (indrel->rd_am->amconcurrent)
690
LockRelation(indrel, RowExclusiveLock);
692
LockRelation(indrel, AccessExclusiveLock);
694
/* Do bulk deletion */
695
stats = index_bulk_delete(indrel, lazy_tid_reaped, (void *) vacrelstats);
697
/* Do post-VACUUM cleanup */
698
vcinfo.vacuum_full = false;
699
vcinfo.message_level = elevel;
701
stats = index_vacuum_cleanup(indrel, &vcinfo, stats);
704
* Release lock acquired above.
706
if (indrel->rd_am->amconcurrent)
707
UnlockRelation(indrel, RowExclusiveLock);
709
UnlockRelation(indrel, AccessExclusiveLock);
714
/* accumulate total removed over multiple index-cleaning cycles */
715
*index_tups_vacuumed += stats->tuples_removed;
716
*index_pages_removed += stats->pages_removed;
718
/* now update statistics in pg_class */
719
vac_update_relstats(RelationGetRelid(indrel),
721
stats->num_index_tuples,
725
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
726
RelationGetRelationName(indrel),
727
stats->num_index_tuples,
729
errdetail("%.0f index row versions were removed.\n"
730
"%u index pages have been deleted, %u are currently reusable.\n"
732
stats->tuples_removed,
733
stats->pages_deleted, stats->pages_free,
734
vac_show_rusage(&ru0))));
740
* lazy_truncate_heap - try to truncate off any empty pages at the end
743
lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
745
BlockNumber old_rel_pages = vacrelstats->rel_pages;
746
BlockNumber new_rel_pages;
747
PageFreeSpaceInfo *pageSpaces;
753
vac_init_rusage(&ru0);
756
* We need full exclusive lock on the relation in order to do
757
* truncation. If we can't get it, give up rather than waiting --- we
758
* don't want to block other backends, and we don't want to deadlock
759
* (which is quite possible considering we already hold a lower-grade
762
if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
766
* Now that we have exclusive lock, look to see if the rel has grown
767
* whilst we were vacuuming with non-exclusive lock. If so, give up;
768
* the newly added pages presumably contain non-deletable tuples.
770
new_rel_pages = RelationGetNumberOfBlocks(onerel);
771
if (new_rel_pages != old_rel_pages)
773
/* might as well use the latest news when we update pg_class stats */
774
vacrelstats->rel_pages = new_rel_pages;
775
UnlockRelation(onerel, AccessExclusiveLock);
780
* Scan backwards from the end to verify that the end pages actually
781
* contain nothing we need to keep. This is *necessary*, not
782
* optional, because other backends could have added tuples to these
783
* pages whilst we were vacuuming.
785
new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
787
if (new_rel_pages >= old_rel_pages)
789
/* can't do anything after all */
790
UnlockRelation(onerel, AccessExclusiveLock);
797
* First, flush any shared buffers for the blocks we intend to delete.
798
* FlushRelationBuffers is a bit more than we need for this, since it
799
* will also write out dirty buffers for blocks we aren't deleting,
800
* but it's the closest thing in bufmgr's API.
802
FlushRelationBuffers(onerel, new_rel_pages);
805
* Do the physical truncation.
807
RelationTruncate(onerel, new_rel_pages);
810
* Drop free-space info for removed blocks; these must not get entered
813
pageSpaces = vacrelstats->free_pages;
814
n = vacrelstats->num_free_pages;
816
for (i = 0; i < n; i++)
818
if (pageSpaces[i].blkno < new_rel_pages)
820
pageSpaces[j] = pageSpaces[i];
824
vacrelstats->num_free_pages = j;
825
/* We destroyed the heap ordering, so mark array unordered */
826
vacrelstats->fs_is_heap = false;
828
/* update statistics */
829
vacrelstats->rel_pages = new_rel_pages;
830
vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
833
* We keep the exclusive lock until commit (perhaps not necessary)?
837
(errmsg("\"%s\": truncated %u to %u pages",
838
RelationGetRelationName(onerel),
839
old_rel_pages, new_rel_pages),
841
vac_show_rusage(&ru0))));
845
* Rescan end pages to verify that they are (still) empty of needed tuples.
847
* Returns number of nondeletable pages (last nonempty page + 1).
850
count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
855
/* Strange coding of loop control is needed because blkno is unsigned */
856
blkno = vacrelstats->rel_pages;
857
while (blkno > vacrelstats->nonempty_pages)
866
vacuum_delay_point();
870
buf = ReadBuffer(onerel, blkno);
872
/* In this phase we only need shared access to the buffer */
873
LockBuffer(buf, BUFFER_LOCK_SHARE);
875
page = BufferGetPage(buf);
877
if (PageIsNew(page) || PageIsEmpty(page))
879
/* PageIsNew probably shouldn't happen... */
880
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
886
maxoff = PageGetMaxOffsetNumber(page);
887
for (offnum = FirstOffsetNumber;
889
offnum = OffsetNumberNext(offnum))
893
itemid = PageGetItemId(page, offnum);
895
if (!ItemIdIsUsed(itemid))
898
tuple.t_datamcxt = NULL;
899
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
900
tuple.t_len = ItemIdGetLength(itemid);
901
ItemPointerSet(&(tuple.t_self), blkno, offnum);
905
switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
908
tupgone = true; /* we can delete the tuple */
911
/* Shouldn't be necessary to re-freeze anything */
913
case HEAPTUPLE_RECENTLY_DEAD:
916
* If tuple is recently deleted then we must not
917
* remove it from relation.
920
case HEAPTUPLE_INSERT_IN_PROGRESS:
921
/* This is an expected case during concurrent vacuum */
923
case HEAPTUPLE_DELETE_IN_PROGRESS:
924
/* This is an expected case during concurrent vacuum */
927
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
934
break; /* can stop scanning */
936
} /* scan along page */
938
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
942
/* Done scanning if we found a tuple here */
948
* If we fall out of the loop, all the previously-thought-to-be-empty
949
* pages really are; we need not bother to look at the last
950
* known-nonempty page.
952
return vacrelstats->nonempty_pages;
956
* lazy_space_alloc - space allocation decisions for lazy vacuum
958
* See the comments at the head of this file for rationale.
961
lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
966
maxtuples = (int) ((maintenance_work_mem * 1024L) / sizeof(ItemPointerData));
967
/* stay sane if small maintenance_work_mem */
968
if (maxtuples < MAX_TUPLES_PER_PAGE)
969
maxtuples = MAX_TUPLES_PER_PAGE;
971
vacrelstats->num_dead_tuples = 0;
972
vacrelstats->max_dead_tuples = maxtuples;
973
vacrelstats->dead_tuples = (ItemPointer)
974
palloc(maxtuples * sizeof(ItemPointerData));
976
maxpages = MaxFSMPages;
977
/* No need to allocate more pages than the relation has blocks */
978
if (relblocks < (BlockNumber) maxpages)
979
maxpages = (int) relblocks;
981
vacrelstats->fs_is_heap = false;
982
vacrelstats->num_free_pages = 0;
983
vacrelstats->max_free_pages = maxpages;
984
vacrelstats->free_pages = (PageFreeSpaceInfo *)
985
palloc(maxpages * sizeof(PageFreeSpaceInfo));
989
* lazy_record_dead_tuple - remember one deletable tuple
992
lazy_record_dead_tuple(LVRelStats *vacrelstats,
996
* The array shouldn't overflow under normal behavior, but perhaps it
997
* could if we are given a really small maintenance_work_mem. In that
998
* case, just forget the last few tuples.
1000
if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
1002
vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
1003
vacrelstats->num_dead_tuples++;
1008
* lazy_record_free_space - remember free space on one page
1011
lazy_record_free_space(LVRelStats *vacrelstats,
1015
PageFreeSpaceInfo *pageSpaces;
1019
* A page with less than stats->threshold free space will be forgotten
1020
* immediately, and never passed to the free space map. Removing the
1021
* uselessly small entries early saves cycles, and in particular
1022
* reduces the amount of time we spend holding the FSM lock when we
1023
* finally call RecordRelationFreeSpace. Since the FSM will probably
1024
* drop pages with little free space anyway, there's no point in
1025
* making this really small.
1027
* XXX Is it worth trying to measure average tuple size, and using that
1028
* to adjust the threshold? Would be worthwhile if FSM has no stats
1029
* yet for this relation. But changing the threshold as we scan the
1030
* rel might lead to bizarre behavior, too. Also, it's probably
1031
* better if vacuum.c has the same thresholding behavior as we do
1034
if (avail < vacrelstats->threshold)
1037
/* Copy pointers to local variables for notational simplicity */
1038
pageSpaces = vacrelstats->free_pages;
1039
n = vacrelstats->max_free_pages;
1041
/* If we haven't filled the array yet, just keep adding entries */
1042
if (vacrelstats->num_free_pages < n)
1044
pageSpaces[vacrelstats->num_free_pages].blkno = page;
1045
pageSpaces[vacrelstats->num_free_pages].avail = avail;
1046
vacrelstats->num_free_pages++;
1051
* The rest of this routine works with "heap" organization of the
1052
* free space arrays, wherein we maintain the heap property
1053
* avail[(j-1) div 2] <= avail[j] for 0 < j < n.
1054
* In particular, the zero'th element always has the smallest available
1055
* space and can be discarded to make room for a new page with more space.
1056
* See Knuth's discussion of heap-based priority queues, sec 5.2.3;
1057
* but note he uses 1-origin array subscripts, not 0-origin.
1061
/* If we haven't yet converted the array to heap organization, do it */
1062
if (!vacrelstats->fs_is_heap)
1065
* Scan backwards through the array, "sift-up" each value into its
1066
* correct position. We can start the scan at n/2-1 since each
1067
* entry above that position has no children to worry about.
1073
BlockNumber R = pageSpaces[l].blkno;
1074
Size K = pageSpaces[l].avail;
1075
int i; /* i is where the "hole" is */
1084
if (j + 1 < n && pageSpaces[j].avail > pageSpaces[j + 1].avail)
1086
if (K <= pageSpaces[j].avail)
1088
pageSpaces[i] = pageSpaces[j];
1091
pageSpaces[i].blkno = R;
1092
pageSpaces[i].avail = K;
1095
vacrelstats->fs_is_heap = true;
1098
/* If new page has more than zero'th entry, insert it into heap */
1099
if (avail > pageSpaces[0].avail)
1102
* Notionally, we replace the zero'th entry with the new data, and
1103
* then sift-up to maintain the heap property. Physically, the
1104
* new data doesn't get stored into the arrays until we find the
1105
* right location for it.
1107
int i = 0; /* i is where the "hole" is */
1115
if (j + 1 < n && pageSpaces[j].avail > pageSpaces[j + 1].avail)
1117
if (avail <= pageSpaces[j].avail)
1119
pageSpaces[i] = pageSpaces[j];
1122
pageSpaces[i].blkno = page;
1123
pageSpaces[i].avail = avail;
1128
* lazy_tid_reaped() -- is a particular tid deletable?
1130
* This has the right signature to be an IndexBulkDeleteCallback.
1132
* Assumes dead_tuples array is in sorted order.
1135
lazy_tid_reaped(ItemPointer itemptr, void *state)
1137
LVRelStats *vacrelstats = (LVRelStats *) state;
1140
res = (ItemPointer) bsearch((void *) itemptr,
1141
(void *) vacrelstats->dead_tuples,
1142
vacrelstats->num_dead_tuples,
1143
sizeof(ItemPointerData),
1146
return (res != NULL);
1150
* Dummy version for lazy_scan_index.
1153
dummy_tid_reaped(ItemPointer itemptr, void *state)
1159
* Update the shared Free Space Map with the info we now have about
1160
* free space in the relation, discarding any old info the map may have.
1163
lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats)
1165
PageFreeSpaceInfo *pageSpaces = vacrelstats->free_pages;
1166
int nPages = vacrelstats->num_free_pages;
1169
* Sort data into order, as required by RecordRelationFreeSpace.
1172
qsort(pageSpaces, nPages, sizeof(PageFreeSpaceInfo),
1173
vac_cmp_page_spaces);
1175
RecordRelationFreeSpace(&onerel->rd_node, nPages, pageSpaces);
1179
* Comparator routines for use with qsort() and bsearch().
1182
vac_cmp_itemptr(const void *left, const void *right)
1189
lblk = ItemPointerGetBlockNumber((ItemPointer) left);
1190
rblk = ItemPointerGetBlockNumber((ItemPointer) right);
1197
loff = ItemPointerGetOffsetNumber((ItemPointer) left);
1198
roff = ItemPointerGetOffsetNumber((ItemPointer) right);
1209
vac_cmp_page_spaces(const void *left, const void *right)
1211
PageFreeSpaceInfo *linfo = (PageFreeSpaceInfo *) left;
1212
PageFreeSpaceInfo *rinfo = (PageFreeSpaceInfo *) right;
1214
if (linfo->blkno < rinfo->blkno)
1216
else if (linfo->blkno > rinfo->blkno)