~ubuntu-branches/ubuntu/hardy/postgresql-8.4/hardy-backports

« back to all changes in this revision

Viewing changes to src/backend/commands/vacuumlazy.c

  • Committer: Package Import Robot
  • Author(s): Ubuntu Archive Auto-Backport
  • Date: 2011-10-27 06:13:09 UTC
  • mfrom: (5.3.14 sid)
  • Revision ID: package-import@ubuntu.com-20111027061309-zc27cjc6hu8yp0z0
Tags: 8.4.9-1~hardy1
Automated backport upload; no source changes.

Show diffs side-by-side

added added

removed removed

Lines of Context:
78
78
 * Before we consider skipping a page that's marked as clean in
79
79
 * visibility map, we must've seen at least this many clean pages.
80
80
 */
81
 
#define SKIP_PAGES_THRESHOLD    32
 
81
#define SKIP_PAGES_THRESHOLD    ((BlockNumber) 32)
82
82
 
83
83
typedef struct LVRelStats
84
84
{
85
85
        /* hasindex = true means two-pass strategy; false means one-pass */
86
86
        bool            hasindex;
87
 
        bool            scanned_all;    /* have we scanned all pages (this far)? */
88
87
        /* Overall statistics about rel */
89
 
        BlockNumber rel_pages;
 
88
        BlockNumber old_rel_pages;      /* previous value of pg_class.relpages */
 
89
        BlockNumber rel_pages;          /* total number of pages */
 
90
        BlockNumber scanned_pages;      /* number of pages we examined */
 
91
        double          scanned_tuples; /* counts only tuples on scanned pages */
90
92
        double          old_rel_tuples; /* previous value of pg_class.reltuples */
91
 
        double          rel_tuples;             /* counts only tuples on scanned pages */
 
93
        double          new_rel_tuples; /* new estimated total # of tuples */
92
94
        BlockNumber pages_removed;
93
95
        double          tuples_deleted;
94
96
        BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
146
148
 */
147
149
bool
148
150
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
149
 
                                BufferAccessStrategy bstrategy, bool *scanned_all)
 
151
                                BufferAccessStrategy bstrategy)
150
152
{
151
153
        LVRelStats *vacrelstats;
152
154
        Relation   *Irel;
156
158
        TimestampTz starttime = 0;
157
159
        bool            scan_all;
158
160
        TransactionId freezeTableLimit;
 
161
        BlockNumber new_rel_pages;
 
162
        double          new_rel_tuples;
 
163
        TransactionId new_frozen_xid;
159
164
        bool            heldoff = false;
160
165
 
161
166
        pg_rusage_init(&ru0);
179
184
 
180
185
        vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
181
186
 
182
 
        vacrelstats->scanned_all = true;        /* will be cleared if we skip a page */
 
187
        vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
183
188
        vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
184
189
        vacrelstats->num_index_scans = 0;
185
190
 
219
224
        FreeSpaceMapVacuum(onerel);
220
225
 
221
226
        /*
222
 
         * Update statistics in pg_class.  But only if we didn't skip any pages;
223
 
         * the tuple count only includes tuples from the pages we've visited, and
224
 
         * we haven't frozen tuples in unvisited pages either.  The page count is
225
 
         * accurate in any case, but because we use the reltuples / relpages ratio
226
 
         * in the planner, it's better to not update relpages either if we can't
227
 
         * update reltuples.
 
227
         * Update statistics in pg_class.
 
228
         *
 
229
         * A corner case here is that if we scanned no pages at all because every
 
230
         * page is all-visible, we should not update relpages/reltuples, because
 
231
         * we have no new information to contribute.  In particular this keeps
 
232
         * us from replacing relpages=reltuples=0 (which means "unknown tuple
 
233
         * density") with nonzero relpages and reltuples=0 (which means "zero
 
234
         * tuple density") unless there's some actual evidence for the latter.
 
235
         *
 
236
         * Also, don't change relfrozenxid if we skipped any pages, since then
 
237
         * we don't know for certain that all tuples have a newer xmin.
228
238
         */
229
 
        if (vacrelstats->scanned_all)
230
 
                vac_update_relstats(onerel,
231
 
                                                        vacrelstats->rel_pages, vacrelstats->rel_tuples,
232
 
                                                        vacrelstats->hasindex,
233
 
                                                        FreezeLimit);
 
239
        new_rel_pages = vacrelstats->rel_pages;
 
240
        new_rel_tuples = vacrelstats->new_rel_tuples;
 
241
        if (vacrelstats->scanned_pages == 0 && new_rel_pages > 0)
 
242
        {
 
243
                new_rel_pages = vacrelstats->old_rel_pages;
 
244
                new_rel_tuples = vacrelstats->old_rel_tuples;
 
245
        }
 
246
 
 
247
        new_frozen_xid = FreezeLimit;
 
248
        if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
 
249
                new_frozen_xid = InvalidTransactionId;
 
250
 
 
251
        vac_update_relstats(onerel,
 
252
                                                new_rel_pages, new_rel_tuples,
 
253
                                                vacrelstats->hasindex,
 
254
                                                new_frozen_xid);
234
255
 
235
256
        /* report results to the stats collector, too */
236
257
        pgstat_report_vacuum(RelationGetRelid(onerel),
237
258
                                                 onerel->rd_rel->relisshared,
238
 
                                                 vacrelstats->scanned_all,
239
 
                                                 vacstmt->analyze, vacrelstats->rel_tuples);
 
259
                                                 vacstmt->analyze,
 
260
                                                 new_rel_tuples);
240
261
 
241
262
        /* and log the action if appropriate */
242
263
        if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
253
274
                                                        get_namespace_name(RelationGetNamespace(onerel)),
254
275
                                                        RelationGetRelationName(onerel),
255
276
                                                        vacrelstats->num_index_scans,
256
 
                                                  vacrelstats->pages_removed, vacrelstats->rel_pages,
257
 
                                                vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
 
277
                                                        vacrelstats->pages_removed,
 
278
                                                        vacrelstats->rel_pages,
 
279
                                                        vacrelstats->tuples_deleted,
 
280
                                                        new_rel_tuples,
258
281
                                                        pg_rusage_show(&ru0))));
259
282
        }
260
283
 
261
 
        if (scanned_all)
262
 
                *scanned_all = vacrelstats->scanned_all;
263
 
 
264
284
        return heldoff;
265
285
}
266
286
 
285
305
        HeapTupleData tuple;
286
306
        char       *relname;
287
307
        BlockNumber empty_pages,
288
 
                                scanned_pages,
289
308
                                vacuumed_pages;
290
309
        double          num_tuples,
291
310
                                tups_vacuumed,
295
314
        int                     i;
296
315
        PGRUsage        ru0;
297
316
        Buffer          vmbuffer = InvalidBuffer;
298
 
        BlockNumber all_visible_streak;
 
317
        BlockNumber next_not_all_visible_block;
 
318
        bool            skipping_all_visible_blocks;
299
319
 
300
320
        pg_rusage_init(&ru0);
301
321
 
305
325
                                        get_namespace_name(RelationGetNamespace(onerel)),
306
326
                                        relname)));
307
327
 
308
 
        empty_pages = vacuumed_pages = scanned_pages = 0;
 
328
        empty_pages = vacuumed_pages = 0;
309
329
        num_tuples = tups_vacuumed = nkeep = nunused = 0;
310
330
 
311
331
        indstats = (IndexBulkDeleteResult **)
313
333
 
314
334
        nblocks = RelationGetNumberOfBlocks(onerel);
315
335
        vacrelstats->rel_pages = nblocks;
 
336
        vacrelstats->scanned_pages = 0;
316
337
        vacrelstats->nonempty_pages = 0;
317
338
 
318
339
        lazy_space_alloc(vacrelstats, nblocks);
319
340
 
320
 
        all_visible_streak = 0;
 
341
        /*
 
342
         * We want to skip pages that don't require vacuuming according to the
 
343
         * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD
 
344
         * consecutive pages.  Since we're reading sequentially, the OS should be
 
345
         * doing readahead for us, so there's no gain in skipping a page now and
 
346
         * then; that's likely to disable readahead and so be counterproductive.
 
347
         * Also, skipping even a single page means that we can't update
 
348
         * relfrozenxid, so we only want to do it if we can skip a goodly number
 
349
         * of pages.
 
350
         *
 
351
         * Before entering the main loop, establish the invariant that
 
352
         * next_not_all_visible_block is the next block number >= blkno that's
 
353
         * not all-visible according to the visibility map, or nblocks if there's
 
354
         * no such block.  Also, we set up the skipping_all_visible_blocks flag,
 
355
         * which is needed because we need hysteresis in the decision: once we've
 
356
         * started skipping blocks, we may as well skip everything up to the next
 
357
         * not-all-visible block.
 
358
         *
 
359
         * Note: if scan_all is true, we won't actually skip any pages; but we
 
360
         * maintain next_not_all_visible_block anyway, so as to set up the
 
361
         * all_visible_according_to_vm flag correctly for each page.
 
362
         */
 
363
        for (next_not_all_visible_block = 0;
 
364
                 next_not_all_visible_block < nblocks;
 
365
                 next_not_all_visible_block++)
 
366
        {
 
367
                if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
 
368
                        break;
 
369
                vacuum_delay_point();
 
370
        }
 
371
        if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD)
 
372
                skipping_all_visible_blocks = true;
 
373
        else
 
374
                skipping_all_visible_blocks = false;
 
375
 
321
376
        for (blkno = 0; blkno < nblocks; blkno++)
322
377
        {
323
378
                Buffer          buf;
330
385
                OffsetNumber frozen[MaxOffsetNumber];
331
386
                int                     nfrozen;
332
387
                Size            freespace;
333
 
                bool            all_visible_according_to_vm = false;
 
388
                bool            all_visible_according_to_vm;
334
389
                bool            all_visible;
 
390
                bool            has_dead_tuples;
335
391
 
336
 
                /*
337
 
                 * Skip pages that don't require vacuuming according to the visibility
338
 
                 * map. But only if we've seen a streak of at least
339
 
                 * SKIP_PAGES_THRESHOLD pages marked as clean. Since we're reading
340
 
                 * sequentially, the OS should be doing readahead for us and there's
341
 
                 * no gain in skipping a page now and then. You need a longer run of
342
 
                 * consecutive skipped pages before it's worthwhile. Also, skipping
343
 
                 * even a single page means that we can't update relfrozenxid or
344
 
                 * reltuples, so we only want to do it if there's a good chance to
345
 
                 * skip a goodly number of pages.
346
 
                 */
347
 
                if (!scan_all)
 
392
                if (blkno == next_not_all_visible_block)
348
393
                {
349
 
                        all_visible_according_to_vm =
350
 
                                visibilitymap_test(onerel, blkno, &vmbuffer);
351
 
                        if (all_visible_according_to_vm)
 
394
                        /* Time to advance next_not_all_visible_block */
 
395
                        for (next_not_all_visible_block++;
 
396
                                 next_not_all_visible_block < nblocks;
 
397
                                 next_not_all_visible_block++)
352
398
                        {
353
 
                                all_visible_streak++;
354
 
                                if (all_visible_streak >= SKIP_PAGES_THRESHOLD)
355
 
                                {
356
 
                                        vacrelstats->scanned_all = false;
357
 
                                        continue;
358
 
                                }
 
399
                                if (!visibilitymap_test(onerel, next_not_all_visible_block,
 
400
                                                                                &vmbuffer))
 
401
                                        break;
 
402
                                vacuum_delay_point();
359
403
                        }
 
404
 
 
405
                        /*
 
406
                         * We know we can't skip the current block.  But set up
 
407
                         * skipping_all_visible_blocks to do the right thing at the
 
408
                         * following blocks.
 
409
                         */
 
410
                        if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD)
 
411
                                skipping_all_visible_blocks = true;
360
412
                        else
361
 
                                all_visible_streak = 0;
 
413
                                skipping_all_visible_blocks = false;
 
414
                        all_visible_according_to_vm = false;
 
415
                }
 
416
                else
 
417
                {
 
418
                        /* Current block is all-visible */
 
419
                        if (skipping_all_visible_blocks && !scan_all)
 
420
                                continue;
 
421
                        all_visible_according_to_vm = true;
362
422
                }
363
423
 
364
424
                vacuum_delay_point();
365
425
 
366
 
                scanned_pages++;
 
426
                vacrelstats->scanned_pages++;
367
427
 
368
428
                /*
369
429
                 * If we are close to overrunning the available space for dead-tuple
475
535
                 * requiring freezing.
476
536
                 */
477
537
                all_visible = true;
 
538
                has_dead_tuples = false;
478
539
                nfrozen = 0;
479
540
                hastup = false;
480
541
                prev_dead_count = vacrelstats->num_dead_tuples;
613
674
                        {
614
675
                                lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
615
676
                                tups_vacuumed += 1;
 
677
                                has_dead_tuples = true;
616
678
                        }
617
679
                        else
618
680
                        {
671
733
                        PageSetAllVisible(page);
672
734
                        SetBufferCommitInfoNeedsSave(buf);
673
735
                }
674
 
                else if (PageIsAllVisible(page) && !all_visible)
 
736
                /*
 
737
                 * It's possible for the value returned by GetOldestXmin() to move
 
738
                 * backwards, so it's not wrong for us to see tuples that appear to
 
739
                 * not be visible to everyone yet, while PD_ALL_VISIBLE is already
 
740
                 * set. The real safe xmin value never moves backwards, but
 
741
                 * GetOldestXmin() is conservative and sometimes returns a value
 
742
                 * that's unnecessarily small, so if we see that contradiction it
 
743
                 * just means that the tuples that we think are not visible to
 
744
                 * everyone yet actually are, and the PD_ALL_VISIBLE flag is correct.
 
745
                 *
 
746
                 * There should never be dead tuples on a page with PD_ALL_VISIBLE
 
747
                 * set, however.
 
748
                 */
 
749
                else if (PageIsAllVisible(page) && has_dead_tuples)
675
750
                {
676
 
                        elog(WARNING, "PD_ALL_VISIBLE flag was incorrectly set in relation \"%s\" page %u",
 
751
                        elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
677
752
                                 relname, blkno);
678
753
                        PageClearAllVisible(page);
679
754
                        SetBufferCommitInfoNeedsSave(buf);
716
791
        }
717
792
 
718
793
        /* save stats for use later */
719
 
        vacrelstats->rel_tuples = num_tuples;
 
794
        vacrelstats->scanned_tuples = num_tuples;
720
795
        vacrelstats->tuples_deleted = tups_vacuumed;
721
796
 
 
797
        /* now we can compute the new value for pg_class.reltuples */
 
798
        vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
 
799
                                                                                                                 nblocks,
 
800
                                                                                                                 vacrelstats->scanned_pages,
 
801
                                                                                                                 num_tuples);
 
802
 
722
803
        /* If any tuples need to be deleted, perform final vacuum cycle */
723
804
        /* XXX put a threshold on min number of tuples here? */
724
805
        if (vacrelstats->num_dead_tuples > 0)
754
835
        ereport(elevel,
755
836
                        (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
756
837
                                        RelationGetRelationName(onerel),
757
 
                                        tups_vacuumed, num_tuples, scanned_pages, nblocks),
 
838
                                        tups_vacuumed, num_tuples,
 
839
                                        vacrelstats->scanned_pages, nblocks),
758
840
                         errdetail("%.0f dead row versions cannot be removed yet.\n"
759
841
                                           "There were %.0f unused item pointers.\n"
760
842
                                           "%u pages are entirely empty.\n"
928
1010
        ivinfo.index = indrel;
929
1011
        ivinfo.vacuum_full = false;
930
1012
        ivinfo.analyze_only = false;
931
 
        ivinfo.estimated_count = !vacrelstats->scanned_all;
 
1013
        ivinfo.estimated_count = (vacrelstats->scanned_pages < vacrelstats->rel_pages);
932
1014
        ivinfo.message_level = elevel;
933
 
        /* use rel_tuples only if we scanned all pages, else fall back */
934
 
        ivinfo.num_heap_tuples = vacrelstats->scanned_all ? vacrelstats->rel_tuples : vacrelstats->old_rel_tuples;
 
1015
        ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
935
1016
        ivinfo.strategy = vac_strategy;
936
1017
 
937
1018
        stats = index_vacuum_cleanup(&ivinfo, stats);
992
1073
        new_rel_pages = RelationGetNumberOfBlocks(onerel);
993
1074
        if (new_rel_pages != old_rel_pages)
994
1075
        {
995
 
                /* might as well use the latest news when we update pg_class stats */
996
 
                vacrelstats->rel_pages = new_rel_pages;
 
1076
                /*
 
1077
                 * Note: we intentionally don't update vacrelstats->rel_pages with
 
1078
                 * the new rel size here.  If we did, it would amount to assuming that
 
1079
                 * the new pages are empty, which is unlikely.  Leaving the numbers
 
1080
                 * alone amounts to assuming that the new pages have the same tuple
 
1081
                 * density as existing ones, which is less unlikely.
 
1082
                 */
997
1083
                UnlockRelation(onerel, AccessExclusiveLock);
998
1084
                return;
999
1085
        }
1028
1114
         * can safely access the table again.
1029
1115
         */
1030
1116
 
1031
 
        /* update statistics */
 
1117
        /*
 
1118
         * Update statistics.  Here, it *is* correct to adjust rel_pages without
 
1119
         * also touching reltuples, since the tuple count wasn't changed by the
 
1120
         * truncation.
 
1121
         */
1032
1122
        vacrelstats->rel_pages = new_rel_pages;
1033
1123
        vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
1034
1124