~vcs-imports/mammoth-replicator/trunk

« back to all changes in this revision

Viewing changes to src/backend/commands/vacuumlazy.c

  • Committer: alvherre
  • Date: 2005-12-16 21:24:52 UTC
  • Revision ID: svn-v4:db760fc0-0f08-0410-9d63-cc6633f64896:trunk:1
Initial import of the REL8_0_3 sources from the Pgsql CVS repository.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * vacuumlazy.c
 
4
 *        Concurrent ("lazy") vacuuming.
 
5
 *
 
6
 *
 
7
 * The major space usage for LAZY VACUUM is storage for the array of dead
 
8
 * tuple TIDs, with the next biggest need being storage for per-disk-page
 
9
 * free space info.  We want to ensure we can vacuum even the very largest
 
10
 * relations with finite memory space usage.  To do that, we set upper bounds
 
11
 * on the number of tuples and pages we will keep track of at once.
 
12
 *
 
13
 * We are willing to use at most maintenance_work_mem memory space to keep
 
14
 * track of dead tuples.  We initially allocate an array of TIDs of that size.
 
15
 * If the array threatens to overflow, we suspend the heap scan phase and
 
16
 * perform a pass of index cleanup and page compaction, then resume the heap
 
17
 * scan with an empty TID array.
 
18
 *
 
19
 * We can limit the storage for page free space to MaxFSMPages entries,
 
20
 * since that's the most the free space map will be willing to remember
 
21
 * anyway.      If the relation has fewer than that many pages with free space,
 
22
 * life is easy: just build an array of per-page info.  If it has more,
 
23
 * we store the free space info as a heap ordered by amount of free space,
 
24
 * so that we can discard the pages with least free space to ensure we never
 
25
 * have more than MaxFSMPages entries in all.  The surviving page entries
 
26
 * are passed to the free space map at conclusion of the scan.
 
27
 *
 
28
 *
 
29
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 
30
 * Portions Copyright (c) 1994, Regents of the University of California
 
31
 *
 
32
 *
 
33
 * IDENTIFICATION
 
34
 *        $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.50.4.2 2005-05-07 21:32:53 tgl Exp $
 
35
 *
 
36
 *-------------------------------------------------------------------------
 
37
 */
 
38
#include "postgres.h"
 
39
 
 
40
#include <math.h>
 
41
 
 
42
#include "access/genam.h"
 
43
#include "access/heapam.h"
 
44
#include "access/xlog.h"
 
45
#include "commands/vacuum.h"
 
46
#include "miscadmin.h"
 
47
#include "storage/freespace.h"
 
48
#include "storage/sinval.h"
 
49
#include "storage/smgr.h"
 
50
#include "utils/lsyscache.h"
 
51
 
 
52
 
 
53
/*
 
54
 * Space/time tradeoff parameters: do these need to be user-tunable?
 
55
 *
 
56
 * To consider truncating the relation, we want there to be at least
 
57
 * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
 
58
 * is less) potentially-freeable pages.
 
59
 */
 
60
#define REL_TRUNCATE_MINIMUM    1000
 
61
#define REL_TRUNCATE_FRACTION   16
 
62
 
 
63
/* MAX_TUPLES_PER_PAGE can be a conservative upper limit */
 
64
#define MAX_TUPLES_PER_PAGE             ((int) (BLCKSZ / sizeof(HeapTupleHeaderData)))
 
65
 
 
66
 
 
67
typedef struct LVRelStats
 
68
{
 
69
        /* Overall statistics about rel */
 
70
        BlockNumber rel_pages;
 
71
        double          rel_tuples;
 
72
        BlockNumber     pages_removed;
 
73
        double          tuples_deleted;
 
74
        BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
 
75
        Size            threshold;              /* minimum interesting free space */
 
76
        /* List of TIDs of tuples we intend to delete */
 
77
        /* NB: this list is ordered by TID address */
 
78
        int                     num_dead_tuples;        /* current # of entries */
 
79
        int                     max_dead_tuples;        /* # slots allocated in array */
 
80
        ItemPointer dead_tuples;        /* array of ItemPointerData */
 
81
        /* Array or heap of per-page info about free space */
 
82
        /* We use a simple array until it fills up, then convert to heap */
 
83
        bool            fs_is_heap;             /* are we using heap organization? */
 
84
        int                     num_free_pages; /* current # of entries */
 
85
        int                     max_free_pages; /* # slots allocated in array */
 
86
        PageFreeSpaceInfo *free_pages;          /* array or heap of blkno/avail */
 
87
} LVRelStats;
 
88
 
 
89
 
 
90
static int      elevel = -1;
 
91
 
 
92
static TransactionId OldestXmin;
 
93
static TransactionId FreezeLimit;
 
94
 
 
95
 
 
96
/* non-export function prototypes */
 
97
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 
98
                           Relation *Irel, int nindexes);
 
99
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 
100
static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats);
 
101
static void lazy_vacuum_index(Relation indrel,
 
102
                                                          double *index_tups_vacuumed,
 
103
                                                          BlockNumber *index_pages_removed,
 
104
                                                          LVRelStats *vacrelstats);
 
105
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 
106
                                 int tupindex, LVRelStats *vacrelstats);
 
107
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
 
108
static BlockNumber count_nondeletable_pages(Relation onerel,
 
109
                                                 LVRelStats *vacrelstats);
 
110
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
 
111
static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
 
112
                                           ItemPointer itemptr);
 
113
static void lazy_record_free_space(LVRelStats *vacrelstats,
 
114
                                           BlockNumber page, Size avail);
 
115
static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 
116
static bool dummy_tid_reaped(ItemPointer itemptr, void *state);
 
117
static void lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats);
 
118
static int      vac_cmp_itemptr(const void *left, const void *right);
 
119
static int      vac_cmp_page_spaces(const void *left, const void *right);
 
120
 
 
121
 
 
122
/*
 
123
 *      lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 
124
 *
 
125
 *              This routine vacuums a single heap, cleans out its indexes, and
 
126
 *              updates its num_pages and num_tuples statistics.
 
127
 *
 
128
 *              At entry, we have already established a transaction and opened
 
129
 *              and locked the relation.
 
130
 */
 
131
void
 
132
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
 
133
{
 
134
        LVRelStats *vacrelstats;
 
135
        Relation   *Irel;
 
136
        int                     nindexes;
 
137
        bool            hasindex;
 
138
        BlockNumber possibly_freeable;
 
139
 
 
140
        if (vacstmt->verbose)
 
141
                elevel = INFO;
 
142
        else
 
143
                elevel = DEBUG2;
 
144
 
 
145
        vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
 
146
                                                  &OldestXmin, &FreezeLimit);
 
147
 
 
148
        vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
 
149
 
 
150
        /* Set threshold for interesting free space = average request size */
 
151
        /* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
 
152
        vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);
 
153
 
 
154
        /* Open all indexes of the relation */
 
155
        vac_open_indexes(onerel, ShareUpdateExclusiveLock, &nindexes, &Irel);
 
156
        hasindex = (nindexes > 0);
 
157
 
 
158
        /* Do the vacuuming */
 
159
        lazy_scan_heap(onerel, vacrelstats, Irel, nindexes);
 
160
 
 
161
        /* Done with indexes */
 
162
        vac_close_indexes(nindexes, Irel, NoLock);
 
163
 
 
164
        /*
 
165
         * Optionally truncate the relation.
 
166
         *
 
167
         * Don't even think about it unless we have a shot at releasing a goodly
 
168
         * number of pages.  Otherwise, the time taken isn't worth it.
 
169
         */
 
170
        possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
 
171
        if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
 
172
         possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
 
173
                lazy_truncate_heap(onerel, vacrelstats);
 
174
 
 
175
        /* Update shared free space map with final free space info */
 
176
        lazy_update_fsm(onerel, vacrelstats);
 
177
 
 
178
        /* Update statistics in pg_class */
 
179
        vac_update_relstats(RelationGetRelid(onerel),
 
180
                                                vacrelstats->rel_pages,
 
181
                                                vacrelstats->rel_tuples,
 
182
                                                hasindex);
 
183
}
 
184
 
 
185
 
 
186
/*
 
187
 *      lazy_scan_heap() -- scan an open heap relation
 
188
 *
 
189
 *              This routine sets commit status bits, builds lists of dead tuples
 
190
 *              and pages with free space, and calculates statistics on the number
 
191
 *              of live tuples in the heap.  When done, or when we run low on space
 
192
 *              for dead-tuple TIDs, invoke vacuuming of indexes and heap.
 
193
 */
 
194
static void
 
195
lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 
196
                           Relation *Irel, int nindexes)
 
197
{
 
198
        BlockNumber nblocks,
 
199
                                blkno;
 
200
        HeapTupleData tuple;
 
201
        char       *relname;
 
202
        BlockNumber empty_pages;
 
203
        double          num_tuples,
 
204
                                tups_vacuumed,
 
205
                                nkeep,
 
206
                                nunused;
 
207
        double     *index_tups_vacuumed;
 
208
        BlockNumber *index_pages_removed;
 
209
        bool            did_vacuum_index = false;
 
210
        int                     i;
 
211
        VacRUsage       ru0;
 
212
 
 
213
        vac_init_rusage(&ru0);
 
214
 
 
215
        relname = RelationGetRelationName(onerel);
 
216
        ereport(elevel,
 
217
                        (errmsg("vacuuming \"%s.%s\"",
 
218
                                        get_namespace_name(RelationGetNamespace(onerel)),
 
219
                                        relname)));
 
220
 
 
221
        empty_pages = 0;
 
222
        num_tuples = tups_vacuumed = nkeep = nunused = 0;
 
223
 
 
224
        /*
 
225
         * Because index vacuuming is done in multiple passes, we have to keep
 
226
         * track of the total number of rows and pages removed from each index.
 
227
         * index_tups_vacuumed[i] is the number removed so far from the i'th
 
228
         * index.  (For partial indexes this could well be different from
 
229
         * tups_vacuumed.)  Likewise for index_pages_removed[i].
 
230
         */
 
231
        index_tups_vacuumed = (double *) palloc0(nindexes * sizeof(double));
 
232
        index_pages_removed = (BlockNumber *) palloc0(nindexes * sizeof(BlockNumber));
 
233
 
 
234
        nblocks = RelationGetNumberOfBlocks(onerel);
 
235
        vacrelstats->rel_pages = nblocks;
 
236
        vacrelstats->nonempty_pages = 0;
 
237
 
 
238
        lazy_space_alloc(vacrelstats, nblocks);
 
239
 
 
240
        for (blkno = 0; blkno < nblocks; blkno++)
 
241
        {
 
242
                Buffer          buf;
 
243
                Page            page;
 
244
                OffsetNumber offnum,
 
245
                                        maxoff;
 
246
                bool            pgchanged,
 
247
                                        tupgone,
 
248
                                        hastup;
 
249
                int                     prev_dead_count;
 
250
 
 
251
                vacuum_delay_point();
 
252
 
 
253
                /*
 
254
                 * If we are close to overrunning the available space for
 
255
                 * dead-tuple TIDs, pause and do a cycle of vacuuming before we
 
256
                 * tackle this page.
 
257
                 */
 
258
                if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MAX_TUPLES_PER_PAGE &&
 
259
                        vacrelstats->num_dead_tuples > 0)
 
260
                {
 
261
                        /* Remove index entries */
 
262
                        for (i = 0; i < nindexes; i++)
 
263
                                lazy_vacuum_index(Irel[i],
 
264
                                                                  &index_tups_vacuumed[i],
 
265
                                                                  &index_pages_removed[i],
 
266
                                                                  vacrelstats);
 
267
                        did_vacuum_index = true;
 
268
                        /* Remove tuples from heap */
 
269
                        lazy_vacuum_heap(onerel, vacrelstats);
 
270
                        /* Forget the now-vacuumed tuples, and press on */
 
271
                        vacrelstats->num_dead_tuples = 0;
 
272
                }
 
273
 
 
274
                buf = ReadBuffer(onerel, blkno);
 
275
 
 
276
                /* In this phase we only need shared access to the buffer */
 
277
                LockBuffer(buf, BUFFER_LOCK_SHARE);
 
278
 
 
279
                page = BufferGetPage(buf);
 
280
 
 
281
                if (PageIsNew(page))
 
282
                {
 
283
                        /*
 
284
                         * An all-zeroes page could be left over if a backend extends
 
285
                         * the relation but crashes before initializing the page.
 
286
                         * Reclaim such pages for use.
 
287
                         *
 
288
                         * We have to be careful here because we could be looking at
 
289
                         * a page that someone has just added to the relation and not
 
290
                         * yet been able to initialize (see RelationGetBufferForTuple).
 
291
                         * To interlock against that, release the buffer read lock
 
292
                         * (which we must do anyway) and grab the relation extension
 
293
                         * lock before re-locking in exclusive mode.  If the page is
 
294
                         * still uninitialized by then, it must be left over from a
 
295
                         * crashed backend, and we can initialize it.
 
296
                         *
 
297
                         * We don't really need the relation lock when this is a new
 
298
                         * or temp relation, but it's probably not worth the code space
 
299
                         * to check that, since this surely isn't a critical path.
 
300
                         *
 
301
                         * Note: the comparable code in vacuum.c need not do all this
 
302
                         * because it's got exclusive lock on the whole relation.
 
303
                         */
 
304
                        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
305
                        LockPage(onerel, 0, ExclusiveLock);
 
306
                        UnlockPage(onerel, 0, ExclusiveLock);
 
307
                        LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
308
                        if (PageIsNew(page))
 
309
                        {
 
310
                                ereport(WARNING,
 
311
                                                (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
 
312
                                                                relname, blkno)));
 
313
                                PageInit(page, BufferGetPageSize(buf), 0);
 
314
                                empty_pages++;
 
315
                                lazy_record_free_space(vacrelstats, blkno,
 
316
                                                                           PageGetFreeSpace(page));
 
317
                        }
 
318
                        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
319
                        WriteBuffer(buf);
 
320
                        continue;
 
321
                }
 
322
 
 
323
                if (PageIsEmpty(page))
 
324
                {
 
325
                        empty_pages++;
 
326
                        lazy_record_free_space(vacrelstats, blkno,
 
327
                                                                   PageGetFreeSpace(page));
 
328
                        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
329
                        ReleaseBuffer(buf);
 
330
                        continue;
 
331
                }
 
332
 
 
333
                pgchanged = false;
 
334
                hastup = false;
 
335
                prev_dead_count = vacrelstats->num_dead_tuples;
 
336
                maxoff = PageGetMaxOffsetNumber(page);
 
337
                for (offnum = FirstOffsetNumber;
 
338
                         offnum <= maxoff;
 
339
                         offnum = OffsetNumberNext(offnum))
 
340
                {
 
341
                        ItemId          itemid;
 
342
 
 
343
                        itemid = PageGetItemId(page, offnum);
 
344
 
 
345
                        if (!ItemIdIsUsed(itemid))
 
346
                        {
 
347
                                nunused += 1;
 
348
                                continue;
 
349
                        }
 
350
 
 
351
                        tuple.t_datamcxt = NULL;
 
352
                        tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
 
353
                        tuple.t_len = ItemIdGetLength(itemid);
 
354
                        ItemPointerSet(&(tuple.t_self), blkno, offnum);
 
355
 
 
356
                        tupgone = false;
 
357
 
 
358
                        switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
 
359
                        {
 
360
                                case HEAPTUPLE_DEAD:
 
361
                                        tupgone = true;         /* we can delete the tuple */
 
362
                                        break;
 
363
                                case HEAPTUPLE_LIVE:
 
364
 
 
365
                                        /*
 
366
                                         * Tuple is good.  Consider whether to replace its
 
367
                                         * xmin value with FrozenTransactionId.
 
368
                                         *
 
369
                                         * NB: Since we hold only a shared buffer lock here, we
 
370
                                         * are assuming that TransactionId read/write is
 
371
                                         * atomic.      This is not the only place that makes such
 
372
                                         * an assumption.  It'd be possible to avoid the
 
373
                                         * assumption by momentarily acquiring exclusive lock,
 
374
                                         * but for the moment I see no need to.
 
375
                                         */
 
376
                                        if (TransactionIdIsNormal(HeapTupleHeaderGetXmin(tuple.t_data)) &&
 
377
                                                TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data),
 
378
                                                                                          FreezeLimit))
 
379
                                        {
 
380
                                                HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
 
381
                                                /* infomask should be okay already */
 
382
                                                Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED);
 
383
                                                pgchanged = true;
 
384
                                        }
 
385
 
 
386
                                        /*
 
387
                                         * Other checks...
 
388
                                         */
 
389
                                        if (onerel->rd_rel->relhasoids &&
 
390
                                                !OidIsValid(HeapTupleGetOid(&tuple)))
 
391
                                                elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
 
392
                                                         relname, blkno, offnum);
 
393
                                        break;
 
394
                                case HEAPTUPLE_RECENTLY_DEAD:
 
395
 
 
396
                                        /*
 
397
                                         * If tuple is recently deleted then we must not
 
398
                                         * remove it from relation.
 
399
                                         */
 
400
                                        nkeep += 1;
 
401
                                        break;
 
402
                                case HEAPTUPLE_INSERT_IN_PROGRESS:
 
403
                                        /* This is an expected case during concurrent vacuum */
 
404
                                        break;
 
405
                                case HEAPTUPLE_DELETE_IN_PROGRESS:
 
406
                                        /* This is an expected case during concurrent vacuum */
 
407
                                        break;
 
408
                                default:
 
409
                                        elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 
410
                                        break;
 
411
                        }
 
412
 
 
413
                        if (tupgone)
 
414
                        {
 
415
                                lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
 
416
                                tups_vacuumed += 1;
 
417
                        }
 
418
                        else
 
419
                        {
 
420
                                num_tuples += 1;
 
421
                                hastup = true;
 
422
                        }
 
423
                }                                               /* scan along page */
 
424
 
 
425
                /*
 
426
                 * If we remembered any tuples for deletion, then the page will be
 
427
                 * visited again by lazy_vacuum_heap, which will compute and
 
428
                 * record its post-compaction free space.  If not, then we're done
 
429
                 * with this page, so remember its free space as-is.
 
430
                 */
 
431
                if (vacrelstats->num_dead_tuples == prev_dead_count)
 
432
                {
 
433
                        lazy_record_free_space(vacrelstats, blkno,
 
434
                                                                   PageGetFreeSpace(page));
 
435
                }
 
436
 
 
437
                /* Remember the location of the last page with nonremovable tuples */
 
438
                if (hastup)
 
439
                        vacrelstats->nonempty_pages = blkno + 1;
 
440
 
 
441
                LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
442
 
 
443
                if (pgchanged)
 
444
                        WriteBuffer(buf);
 
445
                else
 
446
                        ReleaseBuffer(buf);
 
447
        }
 
448
 
 
449
        /* save stats for use later */
 
450
        vacrelstats->rel_tuples = num_tuples;
 
451
        vacrelstats->tuples_deleted = tups_vacuumed;
 
452
 
 
453
        /* If any tuples need to be deleted, perform final vacuum cycle */
 
454
        /* XXX put a threshold on min number of tuples here? */
 
455
        if (vacrelstats->num_dead_tuples > 0)
 
456
        {
 
457
                /* Remove index entries */
 
458
                for (i = 0; i < nindexes; i++)
 
459
                        lazy_vacuum_index(Irel[i],
 
460
                                                          &index_tups_vacuumed[i],
 
461
                                                          &index_pages_removed[i],
 
462
                                                          vacrelstats);
 
463
                /* Remove tuples from heap */
 
464
                lazy_vacuum_heap(onerel, vacrelstats);
 
465
        }
 
466
        else if (!did_vacuum_index)
 
467
        {
 
468
                /* Must do post-vacuum cleanup and statistics update anyway */
 
469
                for (i = 0; i < nindexes; i++)
 
470
                        lazy_scan_index(Irel[i], vacrelstats);
 
471
        }
 
472
 
 
473
        ereport(elevel,
 
474
                        (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
 
475
                                        RelationGetRelationName(onerel),
 
476
                                        tups_vacuumed, num_tuples, nblocks),
 
477
                         errdetail("%.0f dead row versions cannot be removed yet.\n"
 
478
                                           "There were %.0f unused item pointers.\n"
 
479
                                           "%u pages are entirely empty.\n"
 
480
                                           "%s",
 
481
                                           nkeep,
 
482
                                           nunused,
 
483
                                           empty_pages,
 
484
                                           vac_show_rusage(&ru0))));
 
485
}
 
486
 
 
487
 
 
488
/*
 
489
 *      lazy_vacuum_heap() -- second pass over the heap
 
490
 *
 
491
 *              This routine marks dead tuples as unused and compacts out free
 
492
 *              space on their pages.  Pages not having dead tuples recorded from
 
493
 *              lazy_scan_heap are not visited at all.
 
494
 *
 
495
 * Note: the reason for doing this as a second pass is we cannot remove
 
496
 * the tuples until we've removed their index entries, and we want to
 
497
 * process index entry removal in batches as large as possible.
 
498
 */
 
499
static void
 
500
lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 
501
{
 
502
        int                     tupindex;
 
503
        int                     npages;
 
504
        VacRUsage       ru0;
 
505
 
 
506
        vac_init_rusage(&ru0);
 
507
        npages = 0;
 
508
 
 
509
        tupindex = 0;
 
510
        while (tupindex < vacrelstats->num_dead_tuples)
 
511
        {
 
512
                BlockNumber tblk;
 
513
                Buffer          buf;
 
514
                Page            page;
 
515
 
 
516
                vacuum_delay_point();
 
517
 
 
518
                tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
 
519
                buf = ReadBuffer(onerel, tblk);
 
520
                LockBufferForCleanup(buf);
 
521
                tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
 
522
                /* Now that we've compacted the page, record its available space */
 
523
                page = BufferGetPage(buf);
 
524
                lazy_record_free_space(vacrelstats, tblk,
 
525
                                                           PageGetFreeSpace(page));
 
526
                LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
527
                WriteBuffer(buf);
 
528
                npages++;
 
529
        }
 
530
 
 
531
        ereport(elevel,
 
532
                        (errmsg("\"%s\": removed %d row versions in %d pages",
 
533
                                        RelationGetRelationName(onerel),
 
534
                                        tupindex, npages),
 
535
                         errdetail("%s",
 
536
                                           vac_show_rusage(&ru0))));
 
537
}
 
538
 
 
539
/*
 
540
 *      lazy_vacuum_page() -- free dead tuples on a page
 
541
 *                                       and repair its fragmentation.
 
542
 *
 
543
 * Caller is expected to handle reading, locking, and writing the buffer.
 
544
 *
 
545
 * tupindex is the index in vacrelstats->dead_tuples of the first dead
 
546
 * tuple for this page.  We assume the rest follow sequentially.
 
547
 * The return value is the first tupindex after the tuples of this page.
 
548
 */
 
549
static int
 
550
lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 
551
                                 int tupindex, LVRelStats *vacrelstats)
 
552
{
 
553
        OffsetNumber unused[BLCKSZ / sizeof(OffsetNumber)];
 
554
        int                     uncnt;
 
555
        Page            page = BufferGetPage(buffer);
 
556
        ItemId          itemid;
 
557
 
 
558
        START_CRIT_SECTION();
 
559
        for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
 
560
        {
 
561
                BlockNumber tblk;
 
562
                OffsetNumber toff;
 
563
 
 
564
                tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
 
565
                if (tblk != blkno)
 
566
                        break;                          /* past end of tuples for this block */
 
567
                toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
 
568
                itemid = PageGetItemId(page, toff);
 
569
                itemid->lp_flags &= ~LP_USED;
 
570
        }
 
571
 
 
572
        uncnt = PageRepairFragmentation(page, unused);
 
573
 
 
574
        /* XLOG stuff */
 
575
        if (!onerel->rd_istemp)
 
576
        {
 
577
                XLogRecPtr      recptr;
 
578
 
 
579
                recptr = log_heap_clean(onerel, buffer, unused, uncnt);
 
580
                PageSetLSN(page, recptr);
 
581
                PageSetTLI(page, ThisTimeLineID);
 
582
        }
 
583
        else
 
584
        {
 
585
                /* No XLOG record, but still need to flag that XID exists on disk */
 
586
                MyXactMadeTempRelUpdate = true;
 
587
        }
 
588
 
 
589
        END_CRIT_SECTION();
 
590
 
 
591
        return tupindex;
 
592
}
 
593
 
 
594
/*
 
595
 *      lazy_scan_index() -- scan one index relation to update pg_class statistic.
 
596
 *
 
597
 * We use this when we have no deletions to do.
 
598
 */
 
599
static void
 
600
lazy_scan_index(Relation indrel, LVRelStats *vacrelstats)
 
601
{
 
602
        IndexBulkDeleteResult *stats;
 
603
        IndexVacuumCleanupInfo vcinfo;
 
604
        VacRUsage       ru0;
 
605
 
 
606
        vac_init_rusage(&ru0);
 
607
 
 
608
        /*
 
609
         * Acquire appropriate type of lock on index: must be exclusive if
 
610
         * index AM isn't concurrent-safe.
 
611
         */
 
612
        if (indrel->rd_am->amconcurrent)
 
613
                LockRelation(indrel, RowExclusiveLock);
 
614
        else
 
615
                LockRelation(indrel, AccessExclusiveLock);
 
616
 
 
617
        /*
 
618
         * Even though we're not planning to delete anything, we use the
 
619
         * ambulkdelete call, because (a) the scan happens within the index AM
 
620
         * for more speed, and (b) it may want to pass private statistics to
 
621
         * the amvacuumcleanup call.
 
622
         */
 
623
        stats = index_bulk_delete(indrel, dummy_tid_reaped, NULL);
 
624
 
 
625
        /* Do post-VACUUM cleanup, even though we deleted nothing */
 
626
        vcinfo.vacuum_full = false;
 
627
        vcinfo.message_level = elevel;
 
628
 
 
629
        stats = index_vacuum_cleanup(indrel, &vcinfo, stats);
 
630
 
 
631
        /*
 
632
         * Release lock acquired above.
 
633
         */
 
634
        if (indrel->rd_am->amconcurrent)
 
635
                UnlockRelation(indrel, RowExclusiveLock);
 
636
        else
 
637
                UnlockRelation(indrel, AccessExclusiveLock);
 
638
 
 
639
        if (!stats)
 
640
                return;
 
641
 
 
642
        /* now update statistics in pg_class */
 
643
        vac_update_relstats(RelationGetRelid(indrel),
 
644
                                                stats->num_pages,
 
645
                                                stats->num_index_tuples,
 
646
                                                false);
 
647
 
 
648
        ereport(elevel,
 
649
           (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
 
650
                           RelationGetRelationName(indrel),
 
651
                           stats->num_index_tuples,
 
652
                           stats->num_pages),
 
653
                errdetail("%u index pages have been deleted, %u are currently reusable.\n"
 
654
                                  "%s",
 
655
                                  stats->pages_deleted, stats->pages_free,
 
656
                                  vac_show_rusage(&ru0))));
 
657
 
 
658
        pfree(stats);
 
659
}
 
660
 
 
661
/*
 
662
 *      lazy_vacuum_index() -- vacuum one index relation.
 
663
 *
 
664
 *              Delete all the index entries pointing to tuples listed in
 
665
 *              vacrelstats->dead_tuples.
 
666
 *
 
667
 *              Increment *index_tups_vacuumed by the number of index entries
 
668
 *              removed, and *index_pages_removed by the number of pages removed.
 
669
 *
 
670
 *              Finally, we arrange to update the index relation's statistics in
 
671
 *              pg_class.
 
672
 */
 
673
static void
 
674
lazy_vacuum_index(Relation indrel,
 
675
                                  double *index_tups_vacuumed,
 
676
                                  BlockNumber *index_pages_removed,
 
677
                                  LVRelStats *vacrelstats)
 
678
{
 
679
        IndexBulkDeleteResult *stats;
 
680
        IndexVacuumCleanupInfo vcinfo;
 
681
        VacRUsage       ru0;
 
682
 
 
683
        vac_init_rusage(&ru0);
 
684
 
 
685
        /*
 
686
         * Acquire appropriate type of lock on index: must be exclusive if
 
687
         * index AM isn't concurrent-safe.
 
688
         */
 
689
        if (indrel->rd_am->amconcurrent)
 
690
                LockRelation(indrel, RowExclusiveLock);
 
691
        else
 
692
                LockRelation(indrel, AccessExclusiveLock);
 
693
 
 
694
        /* Do bulk deletion */
 
695
        stats = index_bulk_delete(indrel, lazy_tid_reaped, (void *) vacrelstats);
 
696
 
 
697
        /* Do post-VACUUM cleanup */
 
698
        vcinfo.vacuum_full = false;
 
699
        vcinfo.message_level = elevel;
 
700
 
 
701
        stats = index_vacuum_cleanup(indrel, &vcinfo, stats);
 
702
 
 
703
        /*
 
704
         * Release lock acquired above.
 
705
         */
 
706
        if (indrel->rd_am->amconcurrent)
 
707
                UnlockRelation(indrel, RowExclusiveLock);
 
708
        else
 
709
                UnlockRelation(indrel, AccessExclusiveLock);
 
710
 
 
711
        if (!stats)
 
712
                return;
 
713
 
 
714
        /* accumulate total removed over multiple index-cleaning cycles */
 
715
        *index_tups_vacuumed += stats->tuples_removed;
 
716
        *index_pages_removed += stats->pages_removed;
 
717
 
 
718
        /* now update statistics in pg_class */
 
719
        vac_update_relstats(RelationGetRelid(indrel),
 
720
                                                stats->num_pages,
 
721
                                                stats->num_index_tuples,
 
722
                                                false);
 
723
 
 
724
        ereport(elevel,
 
725
           (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
 
726
                           RelationGetRelationName(indrel),
 
727
                           stats->num_index_tuples,
 
728
                           stats->num_pages),
 
729
                errdetail("%.0f index row versions were removed.\n"
 
730
                 "%u index pages have been deleted, %u are currently reusable.\n"
 
731
                                  "%s",
 
732
                                  stats->tuples_removed,
 
733
                                  stats->pages_deleted, stats->pages_free,
 
734
                                  vac_show_rusage(&ru0))));
 
735
 
 
736
        pfree(stats);
 
737
}
 
738
 
 
739
/*
 
740
 * lazy_truncate_heap - try to truncate off any empty pages at the end
 
741
 */
 
742
static void
 
743
lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
 
744
{
 
745
        BlockNumber old_rel_pages = vacrelstats->rel_pages;
 
746
        BlockNumber new_rel_pages;
 
747
        PageFreeSpaceInfo *pageSpaces;
 
748
        int                     n;
 
749
        int                     i,
 
750
                                j;
 
751
        VacRUsage       ru0;
 
752
 
 
753
        vac_init_rusage(&ru0);
 
754
 
 
755
        /*
 
756
         * We need full exclusive lock on the relation in order to do
 
757
         * truncation. If we can't get it, give up rather than waiting --- we
 
758
         * don't want to block other backends, and we don't want to deadlock
 
759
         * (which is quite possible considering we already hold a lower-grade
 
760
         * lock).
 
761
         */
 
762
        if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
 
763
                return;
 
764
 
 
765
        /*
 
766
         * Now that we have exclusive lock, look to see if the rel has grown
 
767
         * whilst we were vacuuming with non-exclusive lock.  If so, give up;
 
768
         * the newly added pages presumably contain non-deletable tuples.
 
769
         */
 
770
        new_rel_pages = RelationGetNumberOfBlocks(onerel);
 
771
        if (new_rel_pages != old_rel_pages)
 
772
        {
 
773
                /* might as well use the latest news when we update pg_class stats */
 
774
                vacrelstats->rel_pages = new_rel_pages;
 
775
                UnlockRelation(onerel, AccessExclusiveLock);
 
776
                return;
 
777
        }
 
778
 
 
779
        /*
 
780
         * Scan backwards from the end to verify that the end pages actually
 
781
         * contain nothing we need to keep.  This is *necessary*, not
 
782
         * optional, because other backends could have added tuples to these
 
783
         * pages whilst we were vacuuming.
 
784
         */
 
785
        new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
 
786
 
 
787
        if (new_rel_pages >= old_rel_pages)
 
788
        {
 
789
                /* can't do anything after all */
 
790
                UnlockRelation(onerel, AccessExclusiveLock);
 
791
                return;
 
792
        }
 
793
 
 
794
        /*
 
795
         * Okay to truncate.
 
796
         *
 
797
         * First, flush any shared buffers for the blocks we intend to delete.
 
798
         * FlushRelationBuffers is a bit more than we need for this, since it
 
799
         * will also write out dirty buffers for blocks we aren't deleting,
 
800
         * but it's the closest thing in bufmgr's API.
 
801
         */
 
802
        FlushRelationBuffers(onerel, new_rel_pages);
 
803
 
 
804
        /*
 
805
         * Do the physical truncation.
 
806
         */
 
807
        RelationTruncate(onerel, new_rel_pages);
 
808
 
 
809
        /*
 
810
         * Drop free-space info for removed blocks; these must not get entered
 
811
         * into the FSM!
 
812
         */
 
813
        pageSpaces = vacrelstats->free_pages;
 
814
        n = vacrelstats->num_free_pages;
 
815
        j = 0;
 
816
        for (i = 0; i < n; i++)
 
817
        {
 
818
                if (pageSpaces[i].blkno < new_rel_pages)
 
819
                {
 
820
                        pageSpaces[j] = pageSpaces[i];
 
821
                        j++;
 
822
                }
 
823
        }
 
824
        vacrelstats->num_free_pages = j;
 
825
        /* We destroyed the heap ordering, so mark array unordered */
 
826
        vacrelstats->fs_is_heap = false;
 
827
 
 
828
        /* update statistics */
 
829
        vacrelstats->rel_pages = new_rel_pages;
 
830
        vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
 
831
 
 
832
        /*
 
833
         * We keep the exclusive lock until commit (perhaps not necessary)?
 
834
         */
 
835
 
 
836
        ereport(elevel,
 
837
                        (errmsg("\"%s\": truncated %u to %u pages",
 
838
                                        RelationGetRelationName(onerel),
 
839
                                        old_rel_pages, new_rel_pages),
 
840
                         errdetail("%s",
 
841
                                           vac_show_rusage(&ru0))));
 
842
}
 
843
 
 
844
/*
 
845
 * Rescan end pages to verify that they are (still) empty of needed tuples.
 
846
 *
 
847
 * Returns number of nondeletable pages (last nonempty page + 1).
 
848
 */
 
849
static BlockNumber
 
850
count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 
851
{
 
852
        BlockNumber blkno;
 
853
        HeapTupleData tuple;
 
854
 
 
855
        /* Strange coding of loop control is needed because blkno is unsigned */
 
856
        blkno = vacrelstats->rel_pages;
 
857
        while (blkno > vacrelstats->nonempty_pages)
 
858
        {
 
859
                Buffer          buf;
 
860
                Page            page;
 
861
                OffsetNumber offnum,
 
862
                                        maxoff;
 
863
                bool            tupgone,
 
864
                                        hastup;
 
865
 
 
866
                vacuum_delay_point();
 
867
 
 
868
                blkno--;
 
869
 
 
870
                buf = ReadBuffer(onerel, blkno);
 
871
 
 
872
                /* In this phase we only need shared access to the buffer */
 
873
                LockBuffer(buf, BUFFER_LOCK_SHARE);
 
874
 
 
875
                page = BufferGetPage(buf);
 
876
 
 
877
                if (PageIsNew(page) || PageIsEmpty(page))
 
878
                {
 
879
                        /* PageIsNew probably shouldn't happen... */
 
880
                        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
881
                        ReleaseBuffer(buf);
 
882
                        continue;
 
883
                }
 
884
 
 
885
                hastup = false;
 
886
                maxoff = PageGetMaxOffsetNumber(page);
 
887
                for (offnum = FirstOffsetNumber;
 
888
                         offnum <= maxoff;
 
889
                         offnum = OffsetNumberNext(offnum))
 
890
                {
 
891
                        ItemId          itemid;
 
892
 
 
893
                        itemid = PageGetItemId(page, offnum);
 
894
 
 
895
                        if (!ItemIdIsUsed(itemid))
 
896
                                continue;
 
897
 
 
898
                        tuple.t_datamcxt = NULL;
 
899
                        tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
 
900
                        tuple.t_len = ItemIdGetLength(itemid);
 
901
                        ItemPointerSet(&(tuple.t_self), blkno, offnum);
 
902
 
 
903
                        tupgone = false;
 
904
 
 
905
                        switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
 
906
                        {
 
907
                                case HEAPTUPLE_DEAD:
 
908
                                        tupgone = true;         /* we can delete the tuple */
 
909
                                        break;
 
910
                                case HEAPTUPLE_LIVE:
 
911
                                        /* Shouldn't be necessary to re-freeze anything */
 
912
                                        break;
 
913
                                case HEAPTUPLE_RECENTLY_DEAD:
 
914
 
 
915
                                        /*
 
916
                                         * If tuple is recently deleted then we must not
 
917
                                         * remove it from relation.
 
918
                                         */
 
919
                                        break;
 
920
                                case HEAPTUPLE_INSERT_IN_PROGRESS:
 
921
                                        /* This is an expected case during concurrent vacuum */
 
922
                                        break;
 
923
                                case HEAPTUPLE_DELETE_IN_PROGRESS:
 
924
                                        /* This is an expected case during concurrent vacuum */
 
925
                                        break;
 
926
                                default:
 
927
                                        elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 
928
                                        break;
 
929
                        }
 
930
 
 
931
                        if (!tupgone)
 
932
                        {
 
933
                                hastup = true;
 
934
                                break;                  /* can stop scanning */
 
935
                        }
 
936
                }                                               /* scan along page */
 
937
 
 
938
                LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
939
 
 
940
                ReleaseBuffer(buf);
 
941
 
 
942
                /* Done scanning if we found a tuple here */
 
943
                if (hastup)
 
944
                        return blkno + 1;
 
945
        }
 
946
 
 
947
        /*
 
948
         * If we fall out of the loop, all the previously-thought-to-be-empty
 
949
         * pages really are; we need not bother to look at the last
 
950
         * known-nonempty page.
 
951
         */
 
952
        return vacrelstats->nonempty_pages;
 
953
}
 
954
 
 
955
/*
 
956
 * lazy_space_alloc - space allocation decisions for lazy vacuum
 
957
 *
 
958
 * See the comments at the head of this file for rationale.
 
959
 */
 
960
static void
 
961
lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
 
962
{
 
963
        int                     maxtuples;
 
964
        int                     maxpages;
 
965
 
 
966
        maxtuples = (int) ((maintenance_work_mem * 1024L) / sizeof(ItemPointerData));
 
967
        /* stay sane if small maintenance_work_mem */
 
968
        if (maxtuples < MAX_TUPLES_PER_PAGE)
 
969
                maxtuples = MAX_TUPLES_PER_PAGE;
 
970
 
 
971
        vacrelstats->num_dead_tuples = 0;
 
972
        vacrelstats->max_dead_tuples = maxtuples;
 
973
        vacrelstats->dead_tuples = (ItemPointer)
 
974
                palloc(maxtuples * sizeof(ItemPointerData));
 
975
 
 
976
        maxpages = MaxFSMPages;
 
977
        /* No need to allocate more pages than the relation has blocks */
 
978
        if (relblocks < (BlockNumber) maxpages)
 
979
                maxpages = (int) relblocks;
 
980
 
 
981
        vacrelstats->fs_is_heap = false;
 
982
        vacrelstats->num_free_pages = 0;
 
983
        vacrelstats->max_free_pages = maxpages;
 
984
        vacrelstats->free_pages = (PageFreeSpaceInfo *)
 
985
                palloc(maxpages * sizeof(PageFreeSpaceInfo));
 
986
}
 
987
 
 
988
/*
 
989
 * lazy_record_dead_tuple - remember one deletable tuple
 
990
 */
 
991
static void
 
992
lazy_record_dead_tuple(LVRelStats *vacrelstats,
 
993
                                           ItemPointer itemptr)
 
994
{
 
995
        /*
 
996
         * The array shouldn't overflow under normal behavior, but perhaps it
 
997
         * could if we are given a really small maintenance_work_mem. In that
 
998
         * case, just forget the last few tuples.
 
999
         */
 
1000
        if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
 
1001
        {
 
1002
                vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
 
1003
                vacrelstats->num_dead_tuples++;
 
1004
        }
 
1005
}
 
1006
 
 
1007
/*
 
1008
 * lazy_record_free_space - remember free space on one page
 
1009
 */
 
1010
static void
 
1011
lazy_record_free_space(LVRelStats *vacrelstats,
 
1012
                                           BlockNumber page,
 
1013
                                           Size avail)
 
1014
{
 
1015
        PageFreeSpaceInfo *pageSpaces;
 
1016
        int                     n;
 
1017
 
 
1018
        /*
 
1019
         * A page with less than stats->threshold free space will be forgotten
 
1020
         * immediately, and never passed to the free space map.  Removing the
 
1021
         * uselessly small entries early saves cycles, and in particular
 
1022
         * reduces the amount of time we spend holding the FSM lock when we
 
1023
         * finally call RecordRelationFreeSpace.  Since the FSM will probably
 
1024
         * drop pages with little free space anyway, there's no point in
 
1025
         * making this really small.
 
1026
         *
 
1027
         * XXX Is it worth trying to measure average tuple size, and using that
 
1028
         * to adjust the threshold?  Would be worthwhile if FSM has no stats
 
1029
         * yet for this relation.  But changing the threshold as we scan the
 
1030
         * rel might lead to bizarre behavior, too.  Also, it's probably
 
1031
         * better if vacuum.c has the same thresholding behavior as we do
 
1032
         * here.
 
1033
         */
 
1034
        if (avail < vacrelstats->threshold)
 
1035
                return;
 
1036
 
 
1037
        /* Copy pointers to local variables for notational simplicity */
 
1038
        pageSpaces = vacrelstats->free_pages;
 
1039
        n = vacrelstats->max_free_pages;
 
1040
 
 
1041
        /* If we haven't filled the array yet, just keep adding entries */
 
1042
        if (vacrelstats->num_free_pages < n)
 
1043
        {
 
1044
                pageSpaces[vacrelstats->num_free_pages].blkno = page;
 
1045
                pageSpaces[vacrelstats->num_free_pages].avail = avail;
 
1046
                vacrelstats->num_free_pages++;
 
1047
                return;
 
1048
        }
 
1049
 
 
1050
        /*----------
 
1051
         * The rest of this routine works with "heap" organization of the
 
1052
         * free space arrays, wherein we maintain the heap property
 
1053
         *                      avail[(j-1) div 2] <= avail[j]  for 0 < j < n.
 
1054
         * In particular, the zero'th element always has the smallest available
 
1055
         * space and can be discarded to make room for a new page with more space.
 
1056
         * See Knuth's discussion of heap-based priority queues, sec 5.2.3;
 
1057
         * but note he uses 1-origin array subscripts, not 0-origin.
 
1058
         *----------
 
1059
         */
 
1060
 
 
1061
        /* If we haven't yet converted the array to heap organization, do it */
 
1062
        if (!vacrelstats->fs_is_heap)
 
1063
        {
 
1064
                /*
 
1065
                 * Scan backwards through the array, "sift-up" each value into its
 
1066
                 * correct position.  We can start the scan at n/2-1 since each
 
1067
                 * entry above that position has no children to worry about.
 
1068
                 */
 
1069
                int                     l = n / 2;
 
1070
 
 
1071
                while (--l >= 0)
 
1072
                {
 
1073
                        BlockNumber R = pageSpaces[l].blkno;
 
1074
                        Size            K = pageSpaces[l].avail;
 
1075
                        int                     i;              /* i is where the "hole" is */
 
1076
 
 
1077
                        i = l;
 
1078
                        for (;;)
 
1079
                        {
 
1080
                                int                     j = 2 * i + 1;
 
1081
 
 
1082
                                if (j >= n)
 
1083
                                        break;
 
1084
                                if (j + 1 < n && pageSpaces[j].avail > pageSpaces[j + 1].avail)
 
1085
                                        j++;
 
1086
                                if (K <= pageSpaces[j].avail)
 
1087
                                        break;
 
1088
                                pageSpaces[i] = pageSpaces[j];
 
1089
                                i = j;
 
1090
                        }
 
1091
                        pageSpaces[i].blkno = R;
 
1092
                        pageSpaces[i].avail = K;
 
1093
                }
 
1094
 
 
1095
                vacrelstats->fs_is_heap = true;
 
1096
        }
 
1097
 
 
1098
        /* If new page has more than zero'th entry, insert it into heap */
 
1099
        if (avail > pageSpaces[0].avail)
 
1100
        {
 
1101
                /*
 
1102
                 * Notionally, we replace the zero'th entry with the new data, and
 
1103
                 * then sift-up to maintain the heap property.  Physically, the
 
1104
                 * new data doesn't get stored into the arrays until we find the
 
1105
                 * right location for it.
 
1106
                 */
 
1107
                int                     i = 0;          /* i is where the "hole" is */
 
1108
 
 
1109
                for (;;)
 
1110
                {
 
1111
                        int                     j = 2 * i + 1;
 
1112
 
 
1113
                        if (j >= n)
 
1114
                                break;
 
1115
                        if (j + 1 < n && pageSpaces[j].avail > pageSpaces[j + 1].avail)
 
1116
                                j++;
 
1117
                        if (avail <= pageSpaces[j].avail)
 
1118
                                break;
 
1119
                        pageSpaces[i] = pageSpaces[j];
 
1120
                        i = j;
 
1121
                }
 
1122
                pageSpaces[i].blkno = page;
 
1123
                pageSpaces[i].avail = avail;
 
1124
        }
 
1125
}
 
1126
 
 
1127
/*
 
1128
 *      lazy_tid_reaped() -- is a particular tid deletable?
 
1129
 *
 
1130
 *              This has the right signature to be an IndexBulkDeleteCallback.
 
1131
 *
 
1132
 *              Assumes dead_tuples array is in sorted order.
 
1133
 */
 
1134
static bool
 
1135
lazy_tid_reaped(ItemPointer itemptr, void *state)
 
1136
{
 
1137
        LVRelStats *vacrelstats = (LVRelStats *) state;
 
1138
        ItemPointer res;
 
1139
 
 
1140
        res = (ItemPointer) bsearch((void *) itemptr,
 
1141
                                                                (void *) vacrelstats->dead_tuples,
 
1142
                                                                vacrelstats->num_dead_tuples,
 
1143
                                                                sizeof(ItemPointerData),
 
1144
                                                                vac_cmp_itemptr);
 
1145
 
 
1146
        return (res != NULL);
 
1147
}
 
1148
 
 
1149
/*
 
1150
 * Dummy version for lazy_scan_index.
 
1151
 */
 
1152
static bool
 
1153
dummy_tid_reaped(ItemPointer itemptr, void *state)
 
1154
{
 
1155
        return false;
 
1156
}
 
1157
 
 
1158
/*
 
1159
 * Update the shared Free Space Map with the info we now have about
 
1160
 * free space in the relation, discarding any old info the map may have.
 
1161
 */
 
1162
static void
 
1163
lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats)
 
1164
{
 
1165
        PageFreeSpaceInfo *pageSpaces = vacrelstats->free_pages;
 
1166
        int                     nPages = vacrelstats->num_free_pages;
 
1167
 
 
1168
        /*
 
1169
         * Sort data into order, as required by RecordRelationFreeSpace.
 
1170
         */
 
1171
        if (nPages > 1)
 
1172
                qsort(pageSpaces, nPages, sizeof(PageFreeSpaceInfo),
 
1173
                          vac_cmp_page_spaces);
 
1174
 
 
1175
        RecordRelationFreeSpace(&onerel->rd_node, nPages, pageSpaces);
 
1176
}
 
1177
 
 
1178
/*
 
1179
 * Comparator routines for use with qsort() and bsearch().
 
1180
 */
 
1181
static int
 
1182
vac_cmp_itemptr(const void *left, const void *right)
 
1183
{
 
1184
        BlockNumber lblk,
 
1185
                                rblk;
 
1186
        OffsetNumber loff,
 
1187
                                roff;
 
1188
 
 
1189
        lblk = ItemPointerGetBlockNumber((ItemPointer) left);
 
1190
        rblk = ItemPointerGetBlockNumber((ItemPointer) right);
 
1191
 
 
1192
        if (lblk < rblk)
 
1193
                return -1;
 
1194
        if (lblk > rblk)
 
1195
                return 1;
 
1196
 
 
1197
        loff = ItemPointerGetOffsetNumber((ItemPointer) left);
 
1198
        roff = ItemPointerGetOffsetNumber((ItemPointer) right);
 
1199
 
 
1200
        if (loff < roff)
 
1201
                return -1;
 
1202
        if (loff > roff)
 
1203
                return 1;
 
1204
 
 
1205
        return 0;
 
1206
}
 
1207
 
 
1208
static int
 
1209
vac_cmp_page_spaces(const void *left, const void *right)
 
1210
{
 
1211
        PageFreeSpaceInfo *linfo = (PageFreeSpaceInfo *) left;
 
1212
        PageFreeSpaceInfo *rinfo = (PageFreeSpaceInfo *) right;
 
1213
 
 
1214
        if (linfo->blkno < rinfo->blkno)
 
1215
                return -1;
 
1216
        else if (linfo->blkno > rinfo->blkno)
 
1217
                return 1;
 
1218
        return 0;
 
1219
}