1
/*-------------------------------------------------------------------------
4
* POSTGRES free space map for quickly finding free space in relations
7
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
8
* Portions Copyright (c) 1994, Regents of the University of California
11
* $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.37 2004-12-31 22:00:54 pgsql Exp $
16
* The only really interesting aspect of this code is the heuristics for
17
* deciding how much information we can afford to keep about each relation,
18
* given that we have a limited amount of workspace in shared memory.
19
* These currently work as follows:
21
* The number of distinct relations tracked is limited by a configuration
22
* variable (MaxFSMRelations). When this would be exceeded, we discard the
23
* least recently used relation. A doubly-linked list with move-to-front
24
* behavior keeps track of which relation is least recently used.
26
* For each known relation, we track the average request size given to
27
* GetPageWithFreeSpace() as well as the most recent number of pages given
28
* to RecordRelationFreeSpace(). The average request size is not directly
29
* used in this module, but we expect VACUUM to use it to filter out
30
* uninteresting amounts of space before calling RecordRelationFreeSpace().
31
* The sum of the RRFS page counts is thus the total number of "interesting"
32
* pages that we would like to track; this is called DesiredFSMPages.
34
* The number of pages actually tracked is limited by a configuration variable
35
* (MaxFSMPages). When this is less than DesiredFSMPages, each relation
36
* gets to keep a fraction MaxFSMPages/DesiredFSMPages of its free pages.
37
* We discard pages with less free space to reach this target.
39
* Actually, our space allocation is done in "chunks" of CHUNKPAGES pages,
40
* with each relation guaranteed at least one chunk. This reduces thrashing
41
* of the storage allocations when there are small changes in the RRFS page
42
* counts from one VACUUM to the next. (XXX it might also be worthwhile to
43
* impose some kind of moving-average smoothing on the RRFS page counts?)
45
* So the actual arithmetic is: for each relation compute myRequest as the
46
* number of chunks needed to hold its RRFS page count (not counting the
47
* first, guaranteed chunk); compute sumRequests as the sum of these values
48
* over all relations; then for each relation figure its target allocation
50
* 1 + round(spareChunks * myRequest / sumRequests)
51
* where spareChunks = totalChunks - numRels is the number of chunks we have
52
* a choice what to do with. We round off these numbers because truncating
53
* all of them would waste significant space. But because of roundoff, it's
54
* possible for the last few relations to get less space than they should;
55
* the target allocation must be checked against remaining available space.
57
*-------------------------------------------------------------------------
66
#include "miscadmin.h"
67
#include "storage/fd.h"
68
#include "storage/freespace.h"
69
#include "storage/itemptr.h"
70
#include "storage/lwlock.h"
71
#include "storage/shmem.h"
74
/* Initial value for average-request moving average */
75
#define INITIAL_AVERAGE ((Size) (BLCKSZ / 32))
78
* Number of pages and bytes per allocation chunk. Indexes can squeeze 50%
79
* more pages into the same space because they don't need to remember how much
80
* free space on each page. The nominal number of pages, CHUNKPAGES, is for
81
* regular rels, and INDEXCHUNKPAGES is for indexes. CHUNKPAGES should be
82
* even so that no space is wasted in the index case.
85
#define CHUNKBYTES (CHUNKPAGES * sizeof(FSMPageData))
86
#define INDEXCHUNKPAGES ((int) (CHUNKBYTES / sizeof(IndexFSMPageData)))
90
* Typedefs and macros for items in the page-storage arena. We use the
91
* existing ItemPointer and BlockId data structures, which are designed
92
* to pack well (they should be 6 and 4 bytes apiece regardless of machine
93
* alignment issues). Unfortunately we can't use the ItemPointer access
94
* macros, because they include Asserts insisting that ip_posid != 0.
96
typedef ItemPointerData FSMPageData;
97
typedef BlockIdData IndexFSMPageData;
99
#define FSMPageGetPageNum(ptr) \
100
BlockIdGetBlockNumber(&(ptr)->ip_blkid)
101
#define FSMPageGetSpace(ptr) \
102
((Size) (ptr)->ip_posid)
103
#define FSMPageSetPageNum(ptr, pg) \
104
BlockIdSet(&(ptr)->ip_blkid, pg)
105
#define FSMPageSetSpace(ptr, sz) \
106
((ptr)->ip_posid = (OffsetNumber) (sz))
107
#define IndexFSMPageGetPageNum(ptr) \
108
BlockIdGetBlockNumber(ptr)
109
#define IndexFSMPageSetPageNum(ptr, pg) \
113
* During database shutdown, we store the contents of FSM into a disk file,
114
* which is re-read during startup. This way we don't have a startup
115
* transient condition where FSM isn't really functioning.
117
* The file format is:
119
* endian constant 0x01020304 for detecting endianness problems
122
* -- for each rel, in *reverse* usage order:
128
* arena data array of storedPages FSMPageData or IndexFSMPageData
132
/* Name of FSM cache file (relative to $PGDATA) */
133
#define FSM_CACHE_FILENAME "global/pg_fsm.cache"
135
/* Fixed values in header */
136
#define FSM_CACHE_LABEL "FSM"
137
#define FSM_CACHE_ENDIAN 0x01020304
138
#define FSM_CACHE_VERSION 20030305
140
/* File header layout */
141
typedef struct FsmCacheFileHeader
147
} FsmCacheFileHeader;
149
/* Per-relation header */
150
typedef struct FsmCacheRelHeader
152
RelFileNode key; /* hash key (must be first) */
153
bool isIndex; /* if true, we store only page numbers */
154
uint32 avgRequest; /* moving average of space requests */
155
int32 lastPageCount; /* pages passed to RecordRelationFreeSpace */
156
int32 storedPages; /* # of pages stored in arena */
161
* Shared free-space-map objects
163
* The per-relation objects are indexed by a hash table, and are also members
164
* of two linked lists: one ordered by recency of usage (most recent first),
165
* and the other ordered by physical location of the associated storage in
166
* the page-info arena.
168
* Each relation owns one or more chunks of per-page storage in the "arena".
169
* The chunks for each relation are always consecutive, so that it can treat
170
* its page storage as a simple array. We further insist that its page data
171
* be ordered by block number, so that binary search is possible.
173
* Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs.
174
* This assumes that all processes accessing the map will have the shared
175
* memory segment mapped at the same place in their address space.
177
typedef struct FSMHeader FSMHeader;
178
typedef struct FSMRelation FSMRelation;
180
/* Header for whole map */
183
FSMRelation *usageList; /* FSMRelations in usage-recency order */
184
FSMRelation *usageListTail; /* tail of usage-recency list */
185
FSMRelation *firstRel; /* FSMRelations in arena storage order */
186
FSMRelation *lastRel; /* tail of storage-order list */
187
int numRels; /* number of FSMRelations now in use */
188
double sumRequests; /* sum of requested chunks over all rels */
189
char *arena; /* arena for page-info storage */
190
int totalChunks; /* total size of arena, in chunks */
191
int usedChunks; /* # of chunks assigned */
192
/* NB: there are totalChunks - usedChunks free chunks at end of arena */
196
* Per-relation struct --- this is an entry in the shared hash table.
197
* The hash key is the RelFileNode value (hence, we look at the physical
198
* relation ID, not the logical ID, which is appropriate).
202
RelFileNode key; /* hash key (must be first) */
203
FSMRelation *nextUsage; /* next rel in usage-recency order */
204
FSMRelation *priorUsage; /* prior rel in usage-recency order */
205
FSMRelation *nextPhysical; /* next rel in arena-storage order */
206
FSMRelation *priorPhysical; /* prior rel in arena-storage order */
207
bool isIndex; /* if true, we store only page numbers */
208
Size avgRequest; /* moving average of space requests */
209
int lastPageCount; /* pages passed to RecordRelationFreeSpace */
210
int firstChunk; /* chunk # of my first chunk in arena */
211
int storedPages; /* # of pages stored in arena */
212
int nextPage; /* index (from 0) to start next search at */
216
int MaxFSMRelations; /* these are set by guc.c */
219
static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
220
static HTAB *FreeSpaceMapRelHash; /* points to (what used to be)
221
* FSMHeader->relHash */
224
static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
225
static FSMRelation *create_fsm_rel(RelFileNode *rel);
226
static void delete_fsm_rel(FSMRelation *fsmrel);
227
static int realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex);
228
static void link_fsm_rel_usage(FSMRelation *fsmrel);
229
static void unlink_fsm_rel_usage(FSMRelation *fsmrel);
230
static void link_fsm_rel_storage(FSMRelation *fsmrel);
231
static void unlink_fsm_rel_storage(FSMRelation *fsmrel);
232
static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);
233
static BlockNumber find_index_free_space(FSMRelation *fsmrel);
234
static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page,
236
static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
238
static void compact_fsm_storage(void);
239
static void push_fsm_rels_after(FSMRelation *afterRel);
240
static void pack_incoming_pages(FSMPageData *newLocation, int newPages,
241
PageFreeSpaceInfo *pageSpaces, int nPages);
242
static void pack_existing_pages(FSMPageData *newLocation, int newPages,
243
FSMPageData *oldLocation, int oldPages);
244
static int fsm_calc_request(FSMRelation *fsmrel);
245
static int fsm_calc_target_allocation(int myRequest);
246
static int fsm_current_chunks(FSMRelation *fsmrel);
247
static int fsm_current_allocation(FSMRelation *fsmrel);
256
* InitFreeSpaceMap -- Initialize the freespace module.
258
* This must be called once during shared memory initialization.
259
* It builds the empty free space map table. FreeSpaceLock must also be
260
* initialized at some point, but is not touched here --- we assume there is
261
* no need for locking, since only the calling process can be accessing shared
265
InitFreeSpaceMap(void)
271
/* Create table header */
272
FreeSpaceMap = (FSMHeader *) ShmemInitStruct("Free Space Map Header", sizeof(FSMHeader), &found);
273
if (FreeSpaceMap == NULL)
275
(errcode(ERRCODE_OUT_OF_MEMORY),
276
errmsg("insufficient shared memory for free space map")));
278
MemSet(FreeSpaceMap, 0, sizeof(FSMHeader));
280
/* Create hashtable for FSMRelations */
281
info.keysize = sizeof(RelFileNode);
282
info.entrysize = sizeof(FSMRelation);
283
info.hash = tag_hash;
285
FreeSpaceMapRelHash = ShmemInitHash("Free Space Map Hash",
289
(HASH_ELEM | HASH_FUNCTION));
291
if (!FreeSpaceMapRelHash)
293
(errcode(ERRCODE_OUT_OF_MEMORY),
294
errmsg("insufficient shared memory for free space map")));
300
/* Allocate page-storage arena */
301
nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
302
/* This check ensures spareChunks will be greater than zero */
303
if (nchunks <= MaxFSMRelations)
305
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
306
errmsg("max_fsm_pages must exceed max_fsm_relations * %d",
309
FreeSpaceMap->arena = (char *) ShmemAlloc(nchunks * CHUNKBYTES);
310
if (FreeSpaceMap->arena == NULL)
312
(errcode(ERRCODE_OUT_OF_MEMORY),
313
errmsg("insufficient shared memory for free space map")));
315
FreeSpaceMap->totalChunks = nchunks;
316
FreeSpaceMap->usedChunks = 0;
317
FreeSpaceMap->sumRequests = 0;
321
* Estimate amount of shmem space needed for FSM.
324
FreeSpaceShmemSize(void)
330
size = MAXALIGN(sizeof(FSMHeader));
332
/* hash table, including the FSMRelation objects */
333
size += hash_estimate_size(MaxFSMRelations + 1, sizeof(FSMRelation));
335
/* page-storage arena */
336
nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
338
if (nchunks >= (INT_MAX / CHUNKBYTES))
340
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
341
errmsg("max_fsm_pages is too large")));
343
size += MAXALIGN(nchunks * CHUNKBYTES);
349
* GetPageWithFreeSpace - try to find a page in the given relation with
350
* at least the specified amount of free space.
352
* If successful, return the block number; if not, return InvalidBlockNumber.
354
* The caller must be prepared for the possibility that the returned page
355
* will turn out to have too little space available by the time the caller
356
* gets a lock on it. In that case, the caller should report the actual
357
* amount of free space available on that page and then try again (see
358
* RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
359
* extend the relation.
362
GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
365
BlockNumber freepage;
367
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
370
* We always add a rel to the hashtable when it is inquired about.
372
fsmrel = create_fsm_rel(rel);
375
* Update the moving average of space requests. This code implements
376
* an exponential moving average with an equivalent period of about 63
377
* requests. Ignore silly requests, however, to ensure that the
378
* average stays sane.
380
if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
382
int cur_avg = (int) fsmrel->avgRequest;
384
cur_avg += ((int) spaceNeeded - cur_avg) / 32;
385
fsmrel->avgRequest = (Size) cur_avg;
387
freepage = find_free_space(fsmrel, spaceNeeded);
388
LWLockRelease(FreeSpaceLock);
393
* RecordAndGetPageWithFreeSpace - update info about a page and try again.
395
* We provide this combo form, instead of a separate Record operation,
396
* to save one lock and hash table lookup cycle.
399
RecordAndGetPageWithFreeSpace(RelFileNode *rel,
405
BlockNumber freepage;
407
/* Sanity check: ensure spaceAvail will fit into OffsetNumber */
408
AssertArg(oldSpaceAvail < BLCKSZ);
410
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
413
* We always add a rel to the hashtable when it is inquired about.
415
fsmrel = create_fsm_rel(rel);
418
fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
421
* Update the moving average of space requests, same as in
422
* GetPageWithFreeSpace.
424
if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
426
int cur_avg = (int) fsmrel->avgRequest;
428
cur_avg += ((int) spaceNeeded - cur_avg) / 32;
429
fsmrel->avgRequest = (Size) cur_avg;
432
freepage = find_free_space(fsmrel, spaceNeeded);
433
LWLockRelease(FreeSpaceLock);
438
* GetAvgFSMRequestSize - get average FSM request size for a relation.
440
* If the relation is not known to FSM, return a default value.
443
GetAvgFSMRequestSize(RelFileNode *rel)
448
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
449
fsmrel = lookup_fsm_rel(rel);
451
result = fsmrel->avgRequest;
453
result = INITIAL_AVERAGE;
454
LWLockRelease(FreeSpaceLock);
459
* RecordRelationFreeSpace - record available-space info about a relation.
461
* Any pre-existing info about the relation is assumed obsolete and discarded.
463
* The given pageSpaces[] array must be sorted in order by blkno. Note that
464
* the FSM is at liberty to discard some or all of the data.
467
RecordRelationFreeSpace(RelFileNode *rel,
469
PageFreeSpaceInfo *pageSpaces)
473
/* Limit nPages to something sane */
476
else if (nPages > MaxFSMPages)
477
nPages = MaxFSMPages;
479
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
482
* Note we don't record info about a relation unless there's already
483
* an FSM entry for it, implying someone has done GetPageWithFreeSpace
484
* for it. Inactive rels thus will not clutter the map simply by
487
fsmrel = lookup_fsm_rel(rel);
492
FSMPageData *newLocation;
494
curAlloc = realloc_fsm_rel(fsmrel, nPages, false);
495
curAllocPages = curAlloc * CHUNKPAGES;
498
* If the data fits in our current allocation, just copy it;
499
* otherwise must compress.
501
newLocation = (FSMPageData *)
502
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
503
if (nPages <= curAllocPages)
507
for (i = 0; i < nPages; i++)
509
BlockNumber page = pageSpaces[i].blkno;
510
Size avail = pageSpaces[i].avail;
512
/* Check caller provides sorted data */
513
if (i > 0 && page <= pageSpaces[i - 1].blkno)
514
elog(ERROR, "free-space data is not in page order");
515
FSMPageSetPageNum(newLocation, page);
516
FSMPageSetSpace(newLocation, avail);
519
fsmrel->storedPages = nPages;
523
pack_incoming_pages(newLocation, curAllocPages,
525
fsmrel->storedPages = curAllocPages;
528
LWLockRelease(FreeSpaceLock);
532
* GetFreeIndexPage - like GetPageWithFreeSpace, but for indexes
535
GetFreeIndexPage(RelFileNode *rel)
538
BlockNumber freepage;
540
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
543
* We always add a rel to the hashtable when it is inquired about.
545
fsmrel = create_fsm_rel(rel);
547
freepage = find_index_free_space(fsmrel);
548
LWLockRelease(FreeSpaceLock);
553
* RecordIndexFreeSpace - like RecordRelationFreeSpace, but for indexes
556
RecordIndexFreeSpace(RelFileNode *rel,
562
/* Limit nPages to something sane */
565
else if (nPages > MaxFSMPages)
566
nPages = MaxFSMPages;
568
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
571
* Note we don't record info about a relation unless there's already
572
* an FSM entry for it, implying someone has done GetFreeIndexPage for
573
* it. Inactive rels thus will not clutter the map simply by being
576
fsmrel = lookup_fsm_rel(rel);
582
IndexFSMPageData *newLocation;
584
curAlloc = realloc_fsm_rel(fsmrel, nPages, true);
585
curAllocPages = curAlloc * INDEXCHUNKPAGES;
588
* If the data fits in our current allocation, just copy it;
589
* otherwise must compress. But compression is easy: we merely
590
* forget extra pages.
592
newLocation = (IndexFSMPageData *)
593
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
594
if (nPages > curAllocPages)
595
nPages = curAllocPages;
597
for (i = 0; i < nPages; i++)
599
BlockNumber page = pages[i];
601
/* Check caller provides sorted data */
602
if (i > 0 && page <= pages[i - 1])
603
elog(ERROR, "free-space data is not in page order");
604
IndexFSMPageSetPageNum(newLocation, page);
607
fsmrel->storedPages = nPages;
609
LWLockRelease(FreeSpaceLock);
613
* FreeSpaceMapTruncateRel - adjust for truncation of a relation.
615
* We need to delete any stored data past the new relation length, so that
616
* we don't bogusly return removed block numbers.
619
FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks)
623
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
624
fsmrel = lookup_fsm_rel(rel);
629
/* Use lookup to locate first entry >= nblocks */
630
(void) lookup_fsm_page_entry(fsmrel, nblocks, &pageIndex);
631
/* Delete all such entries */
632
fsmrel->storedPages = pageIndex;
633
/* XXX should we adjust rel's lastPageCount and sumRequests? */
635
LWLockRelease(FreeSpaceLock);
639
* FreeSpaceMapForgetRel - forget all about a relation.
641
* This is called when a relation is deleted. Although we could just let
642
* the rel age out of the map, it's better to reclaim and reuse the space
646
FreeSpaceMapForgetRel(RelFileNode *rel)
650
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
651
fsmrel = lookup_fsm_rel(rel);
653
delete_fsm_rel(fsmrel);
654
LWLockRelease(FreeSpaceLock);
658
* FreeSpaceMapForgetDatabase - forget all relations of a database.
660
* This is called during DROP DATABASE. As above, might as well reclaim
661
* map space sooner instead of later.
664
FreeSpaceMapForgetDatabase(Oid dbid)
669
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
670
for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = nextrel)
672
nextrel = fsmrel->nextUsage; /* in case we delete it */
673
if (fsmrel->key.dbNode == dbid)
674
delete_fsm_rel(fsmrel);
676
LWLockRelease(FreeSpaceLock);
680
* PrintFreeSpaceMapStatistics - print statistics about FSM contents
682
* The info is sent to ereport() with the specified message level. This is
683
* intended for use during VACUUM.
686
PrintFreeSpaceMapStatistics(int elevel)
694
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
695
/* Count total space used --- tedious, but seems useful */
696
for (fsmrel = FreeSpaceMap->firstRel;
698
fsmrel = fsmrel->nextPhysical)
699
storedPages += fsmrel->storedPages;
700
/* Copy other stats before dropping lock */
701
numRels = FreeSpaceMap->numRels;
702
sumRequests = FreeSpaceMap->sumRequests;
703
LWLockRelease(FreeSpaceLock);
705
/* Convert stats to actual number of page slots needed */
706
needed = (sumRequests + numRels) * CHUNKPAGES;
709
(errmsg("free space map: %d relations, %d pages stored; %.0f total pages needed",
710
numRels, storedPages, needed),
711
errdetail("Allocated FSM size: %d relations + %d pages = %.0f kB shared memory.",
712
MaxFSMRelations, MaxFSMPages,
713
(double) FreeSpaceShmemSize() / 1024.0)));
717
* DumpFreeSpaceMap - dump contents of FSM into a disk file for later reload
719
* This is expected to be called during database shutdown, after updates to
720
* the FSM have stopped. We lock the FreeSpaceLock but that's purely pro
721
* forma --- if anyone else is still accessing FSM, there's a problem.
724
DumpFreeSpaceMap(int code, Datum arg)
727
char cachefilename[MAXPGPATH];
728
FsmCacheFileHeader header;
731
/* Try to create file */
732
snprintf(cachefilename, sizeof(cachefilename), "%s/%s",
733
DataDir, FSM_CACHE_FILENAME);
735
unlink(cachefilename); /* in case it exists w/wrong permissions */
737
fp = AllocateFile(cachefilename, PG_BINARY_W);
740
elog(LOG, "could not write \"%s\": %m", cachefilename);
744
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
746
/* Write file header */
747
MemSet(&header, 0, sizeof(header));
748
strcpy(header.label, FSM_CACHE_LABEL);
749
header.endian = FSM_CACHE_ENDIAN;
750
header.version = FSM_CACHE_VERSION;
751
header.numRels = FreeSpaceMap->numRels;
752
if (fwrite(&header, 1, sizeof(header), fp) != sizeof(header))
755
/* For each relation, in order from least to most recently used... */
756
for (fsmrel = FreeSpaceMap->usageListTail;
758
fsmrel = fsmrel->priorUsage)
760
FsmCacheRelHeader relheader;
763
/* Write relation header */
764
MemSet(&relheader, 0, sizeof(relheader));
765
relheader.key = fsmrel->key;
766
relheader.isIndex = fsmrel->isIndex;
767
relheader.avgRequest = fsmrel->avgRequest;
768
relheader.lastPageCount = fsmrel->lastPageCount;
769
relheader.storedPages = fsmrel->storedPages;
770
if (fwrite(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader))
773
/* Write the per-page data directly from the arena */
774
nPages = fsmrel->storedPages;
781
len = nPages * sizeof(IndexFSMPageData);
783
len = nPages * sizeof(FSMPageData);
785
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
786
if (fwrite(data, 1, len, fp) != len)
792
LWLockRelease(FreeSpaceLock);
796
elog(LOG, "could not write \"%s\": %m", cachefilename);
797
/* Remove busted cache file */
798
unlink(cachefilename);
804
elog(LOG, "could not write \"%s\": %m", cachefilename);
807
LWLockRelease(FreeSpaceLock);
811
/* Remove busted cache file */
812
unlink(cachefilename);
816
* LoadFreeSpaceMap - load contents of FSM from a disk file
818
* This is expected to be called during database startup, before any FSM
819
* updates begin. We lock the FreeSpaceLock but that's purely pro
820
* forma --- if anyone else is accessing FSM yet, there's a problem.
822
* Notes: no complaint is issued if no cache file is found. If the file is
823
* found, it is deleted after reading. Thus, if we crash without a clean
824
* shutdown, the next cycle of life starts with no FSM data. To do otherwise,
825
* we'd need to do significantly more validation in this routine, because of
826
* the likelihood that what is in the dump file would be out-of-date, eg
827
* there might be entries for deleted or truncated rels.
830
LoadFreeSpaceMap(void)
833
char cachefilename[MAXPGPATH];
834
FsmCacheFileHeader header;
837
/* Try to open file */
838
snprintf(cachefilename, sizeof(cachefilename), "%s/%s",
839
DataDir, FSM_CACHE_FILENAME);
841
fp = AllocateFile(cachefilename, PG_BINARY_R);
845
elog(LOG, "could not read \"%s\": %m", cachefilename);
849
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
851
/* Read and verify file header */
852
if (fread(&header, 1, sizeof(header), fp) != sizeof(header) ||
853
strcmp(header.label, FSM_CACHE_LABEL) != 0 ||
854
header.endian != FSM_CACHE_ENDIAN ||
855
header.version != FSM_CACHE_VERSION ||
858
elog(LOG, "bogus file header in \"%s\"", cachefilename);
862
/* For each relation, in order from least to most recently used... */
863
for (relno = 0; relno < header.numRels; relno++)
865
FsmCacheRelHeader relheader;
873
/* Read and verify relation header, as best we can */
874
if (fread(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader) ||
875
(relheader.isIndex != false && relheader.isIndex != true) ||
876
relheader.avgRequest >= BLCKSZ ||
877
relheader.lastPageCount < 0 ||
878
relheader.storedPages < 0)
880
elog(LOG, "bogus rel header in \"%s\"", cachefilename);
884
/* Make sure lastPageCount doesn't exceed current MaxFSMPages */
885
if (relheader.lastPageCount > MaxFSMPages)
886
relheader.lastPageCount = MaxFSMPages;
888
/* Read the per-page data */
889
nPages = relheader.storedPages;
890
if (relheader.isIndex)
891
len = nPages * sizeof(IndexFSMPageData);
893
len = nPages * sizeof(FSMPageData);
894
data = (char *) palloc(len);
895
if (fread(data, 1, len, fp) != len)
897
elog(LOG, "premature EOF in \"%s\"", cachefilename);
903
* Okay, create the FSM entry and insert data into it. Since the
904
* rels were stored in reverse usage order, at the end of the loop
905
* they will be correctly usage-ordered in memory; and if
906
* MaxFSMRelations is less than it used to be, we will correctly
907
* drop the least recently used ones.
909
fsmrel = create_fsm_rel(&relheader.key);
910
fsmrel->avgRequest = relheader.avgRequest;
912
curAlloc = realloc_fsm_rel(fsmrel, relheader.lastPageCount,
914
if (relheader.isIndex)
916
IndexFSMPageData *newLocation;
918
curAllocPages = curAlloc * INDEXCHUNKPAGES;
921
* If the data fits in our current allocation, just copy it;
922
* otherwise must compress. But compression is easy: we
923
* merely forget extra pages.
925
newLocation = (IndexFSMPageData *)
926
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
927
if (nPages > curAllocPages)
928
nPages = curAllocPages;
929
memcpy(newLocation, data, nPages * sizeof(IndexFSMPageData));
930
fsmrel->storedPages = nPages;
934
FSMPageData *newLocation;
936
curAllocPages = curAlloc * CHUNKPAGES;
939
* If the data fits in our current allocation, just copy it;
940
* otherwise must compress.
942
newLocation = (FSMPageData *)
943
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
944
if (nPages <= curAllocPages)
946
memcpy(newLocation, data, nPages * sizeof(FSMPageData));
947
fsmrel->storedPages = nPages;
951
pack_existing_pages(newLocation, curAllocPages,
952
(FSMPageData *) data, nPages);
953
fsmrel->storedPages = curAllocPages;
963
LWLockRelease(FreeSpaceLock);
967
/* Remove cache file before it can become stale; see notes above */
968
unlink(cachefilename);
973
* Internal routines. These all assume the caller holds the FreeSpaceLock.
977
* Lookup a relation in the hash table. If not present, return NULL.
979
* The relation's position in the LRU list is not changed.
982
lookup_fsm_rel(RelFileNode *rel)
986
fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
997
* Lookup a relation in the hash table, creating an entry if not present.
999
* On successful lookup, the relation is moved to the front of the LRU list.
1001
static FSMRelation *
1002
create_fsm_rel(RelFileNode *rel)
1004
FSMRelation *fsmrel;
1007
fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
1013
(errcode(ERRCODE_OUT_OF_MEMORY),
1014
errmsg("out of shared memory")));
1018
/* New hashtable entry, initialize it (hash_search set the key) */
1019
fsmrel->isIndex = false; /* until we learn different */
1020
fsmrel->avgRequest = INITIAL_AVERAGE;
1021
fsmrel->lastPageCount = 0;
1022
fsmrel->firstChunk = -1; /* no space allocated */
1023
fsmrel->storedPages = 0;
1024
fsmrel->nextPage = 0;
1026
/* Discard lowest-priority existing rel, if we are over limit */
1027
if (FreeSpaceMap->numRels >= MaxFSMRelations)
1028
delete_fsm_rel(FreeSpaceMap->usageListTail);
1030
/* Add new entry at front of LRU list */
1031
link_fsm_rel_usage(fsmrel);
1032
fsmrel->nextPhysical = NULL; /* not in physical-storage list */
1033
fsmrel->priorPhysical = NULL;
1034
FreeSpaceMap->numRels++;
1035
/* sumRequests is unchanged because request must be zero */
1039
/* Existing entry, move to front of LRU list */
1040
if (fsmrel->priorUsage != NULL)
1042
unlink_fsm_rel_usage(fsmrel);
1043
link_fsm_rel_usage(fsmrel);
1051
* Remove an existing FSMRelation entry.
1054
delete_fsm_rel(FSMRelation *fsmrel)
1056
FSMRelation *result;
1058
FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
1059
unlink_fsm_rel_usage(fsmrel);
1060
unlink_fsm_rel_storage(fsmrel);
1061
FreeSpaceMap->numRels--;
1062
result = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
1063
(void *) &(fsmrel->key),
1067
elog(ERROR, "FreeSpaceMap hashtable corrupted");
1071
* Reallocate space for a FSMRelation.
1073
* This is shared code for RecordRelationFreeSpace and RecordIndexFreeSpace.
1074
* The return value is the actual new allocation, in chunks.
1077
realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex)
1084
* Delete any existing entries, and update request status.
1086
fsmrel->storedPages = 0;
1087
FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
1088
fsmrel->lastPageCount = nPages;
1089
fsmrel->isIndex = isIndex;
1090
myRequest = fsm_calc_request(fsmrel);
1091
FreeSpaceMap->sumRequests += myRequest;
1092
myAlloc = fsm_calc_target_allocation(myRequest);
1095
* Need to reallocate space if (a) my target allocation is more than
1096
* my current allocation, AND (b) my actual immediate need
1097
* (myRequest+1 chunks) is more than my current allocation. Otherwise
1098
* just store the new data in-place.
1100
curAlloc = fsm_current_allocation(fsmrel);
1101
if (myAlloc > curAlloc && (myRequest + 1) > curAlloc && nPages > 0)
1103
/* Remove entry from storage list, and compact */
1104
unlink_fsm_rel_storage(fsmrel);
1105
compact_fsm_storage();
1106
/* Reattach to end of storage list */
1107
link_fsm_rel_storage(fsmrel);
1108
/* And allocate storage */
1109
fsmrel->firstChunk = FreeSpaceMap->usedChunks;
1110
FreeSpaceMap->usedChunks += myAlloc;
1112
/* Watch out for roundoff error */
1113
if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks)
1115
FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
1116
curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk;
1123
* Link a FSMRelation into the LRU list (always at the head).
1126
link_fsm_rel_usage(FSMRelation *fsmrel)
1128
fsmrel->priorUsage = NULL;
1129
fsmrel->nextUsage = FreeSpaceMap->usageList;
1130
FreeSpaceMap->usageList = fsmrel;
1131
if (fsmrel->nextUsage != NULL)
1132
fsmrel->nextUsage->priorUsage = fsmrel;
1134
FreeSpaceMap->usageListTail = fsmrel;
1138
* Delink a FSMRelation from the LRU list.
1141
unlink_fsm_rel_usage(FSMRelation *fsmrel)
1143
if (fsmrel->priorUsage != NULL)
1144
fsmrel->priorUsage->nextUsage = fsmrel->nextUsage;
1146
FreeSpaceMap->usageList = fsmrel->nextUsage;
1147
if (fsmrel->nextUsage != NULL)
1148
fsmrel->nextUsage->priorUsage = fsmrel->priorUsage;
1150
FreeSpaceMap->usageListTail = fsmrel->priorUsage;
1153
* We don't bother resetting fsmrel's links, since it's about to be
1154
* deleted or relinked at the head.
1159
* Link a FSMRelation into the storage-order list (always at the tail).
1162
link_fsm_rel_storage(FSMRelation *fsmrel)
1164
fsmrel->nextPhysical = NULL;
1165
fsmrel->priorPhysical = FreeSpaceMap->lastRel;
1166
if (FreeSpaceMap->lastRel != NULL)
1167
FreeSpaceMap->lastRel->nextPhysical = fsmrel;
1169
FreeSpaceMap->firstRel = fsmrel;
1170
FreeSpaceMap->lastRel = fsmrel;
1174
* Delink a FSMRelation from the storage-order list, if it's in it.
1177
unlink_fsm_rel_storage(FSMRelation *fsmrel)
1179
if (fsmrel->priorPhysical != NULL || FreeSpaceMap->firstRel == fsmrel)
1181
if (fsmrel->priorPhysical != NULL)
1182
fsmrel->priorPhysical->nextPhysical = fsmrel->nextPhysical;
1184
FreeSpaceMap->firstRel = fsmrel->nextPhysical;
1185
if (fsmrel->nextPhysical != NULL)
1186
fsmrel->nextPhysical->priorPhysical = fsmrel->priorPhysical;
1188
FreeSpaceMap->lastRel = fsmrel->priorPhysical;
1190
/* mark as not in list, since we may not put it back immediately */
1191
fsmrel->nextPhysical = NULL;
1192
fsmrel->priorPhysical = NULL;
1193
/* Also mark it as having no storage */
1194
fsmrel->firstChunk = -1;
1195
fsmrel->storedPages = 0;
1199
* Look to see if a page with at least the specified amount of space is
1200
* available in the given FSMRelation. If so, return its page number,
1201
* and advance the nextPage counter so that the next inquiry will return
1202
* a different page if possible; also update the entry to show that the
1203
* requested space is not available anymore. Return InvalidBlockNumber
1207
find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
1210
int pagesToCheck, /* outer loop counter */
1211
pageIndex; /* current page index */
1213
if (fsmrel->isIndex)
1214
elog(ERROR, "find_free_space called for an index relation");
1215
info = (FSMPageData *)
1216
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1217
pageIndex = fsmrel->nextPage;
1218
/* Last operation may have left nextPage pointing past end */
1219
if (pageIndex >= fsmrel->storedPages)
1222
for (pagesToCheck = fsmrel->storedPages; pagesToCheck > 0; pagesToCheck--)
1224
FSMPageData *page = info + pageIndex;
1225
Size spaceAvail = FSMPageGetSpace(page);
1227
/* Check this page */
1228
if (spaceAvail >= spaceNeeded)
1231
* Found what we want --- adjust the entry, and update
1234
FSMPageSetSpace(page, spaceAvail - spaceNeeded);
1235
fsmrel->nextPage = pageIndex + 1;
1236
return FSMPageGetPageNum(page);
1238
/* Advance pageIndex, wrapping around if needed */
1239
if (++pageIndex >= fsmrel->storedPages)
1243
return InvalidBlockNumber; /* nothing found */
1247
* As above, but for index case --- we only deal in whole pages.
1250
find_index_free_space(FSMRelation *fsmrel)
1252
IndexFSMPageData *info;
1256
* If isIndex isn't set, it could be that RecordIndexFreeSpace() has
1257
* never yet been called on this relation, and we're still looking at
1258
* the default setting from create_fsm_rel(). If so, just act as
1259
* though there's no space.
1261
if (!fsmrel->isIndex)
1263
if (fsmrel->storedPages == 0)
1264
return InvalidBlockNumber;
1265
elog(ERROR, "find_index_free_space called for a non-index relation");
1269
* For indexes, there's no need for the nextPage state variable; we
1270
* just remove and return the first available page. (We could save
1271
* cycles here by returning the last page, but it seems better to
1272
* encourage re-use of lower-numbered pages.)
1274
if (fsmrel->storedPages <= 0)
1275
return InvalidBlockNumber; /* no pages available */
1276
info = (IndexFSMPageData *)
1277
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1278
result = IndexFSMPageGetPageNum(info);
1279
fsmrel->storedPages--;
1280
memmove(info, info + 1, fsmrel->storedPages * sizeof(IndexFSMPageData));
1285
* fsm_record_free_space - guts of RecordFreeSpace operation (now only
1286
* provided as part of RecordAndGetPageWithFreeSpace).
1289
fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
1293
if (fsmrel->isIndex)
1294
elog(ERROR, "fsm_record_free_space called for an index relation");
1295
if (lookup_fsm_page_entry(fsmrel, page, &pageIndex))
1297
/* Found an existing entry for page; update it */
1300
info = (FSMPageData *)
1301
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1303
FSMPageSetSpace(info, spaceAvail);
1308
* No existing entry; ignore the call. We used to add the page to
1309
* the FSM --- but in practice, if the page hasn't got enough
1310
* space to satisfy the caller who's kicking it back to us, then
1311
* it's probably uninteresting to everyone else as well.
1317
* Look for an entry for a specific page (block number) in a FSMRelation.
1318
* Returns TRUE if a matching entry exists, else FALSE.
1320
* The output argument *outPageIndex is set to indicate where the entry exists
1321
* (if TRUE result) or could be inserted (if FALSE result).
1324
lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
1327
/* Check for empty relation */
1328
if (fsmrel->storedPages <= 0)
1334
/* Do binary search */
1335
if (fsmrel->isIndex)
1337
IndexFSMPageData *info;
1341
info = (IndexFSMPageData *)
1342
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1344
high = fsmrel->storedPages - 1;
1350
middle = low + (high - low) / 2;
1351
probe = IndexFSMPageGetPageNum(info + middle);
1354
*outPageIndex = middle;
1357
else if (probe < page)
1362
*outPageIndex = low;
1371
info = (FSMPageData *)
1372
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1374
high = fsmrel->storedPages - 1;
1380
middle = low + (high - low) / 2;
1381
probe = FSMPageGetPageNum(info + middle);
1384
*outPageIndex = middle;
1387
else if (probe < page)
1392
*outPageIndex = low;
1398
* Re-pack the FSM storage arena, dropping data if necessary to meet the
1399
* current allocation target for each relation. At conclusion, all available
1400
* space in the arena will be coalesced at the end.
1403
compact_fsm_storage(void)
1405
int nextChunkIndex = 0;
1406
bool did_push = false;
1407
FSMRelation *fsmrel;
1409
for (fsmrel = FreeSpaceMap->firstRel;
1411
fsmrel = fsmrel->nextPhysical)
1422
* Calculate target allocation, make sure we don't overrun due to
1425
newAlloc = fsm_calc_target_allocation(fsm_calc_request(fsmrel));
1426
if (newAlloc > FreeSpaceMap->totalChunks - nextChunkIndex)
1427
newAlloc = FreeSpaceMap->totalChunks - nextChunkIndex;
1428
if (fsmrel->isIndex)
1429
newAllocPages = newAlloc * INDEXCHUNKPAGES;
1431
newAllocPages = newAlloc * CHUNKPAGES;
1434
* Determine current size, current and new locations
1436
curChunks = fsm_current_chunks(fsmrel);
1437
oldChunkIndex = fsmrel->firstChunk;
1438
oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES;
1439
newChunkIndex = nextChunkIndex;
1440
newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES;
1443
* It's possible that we have to move data down, not up, if the
1444
* allocations of previous rels expanded. This normally means
1445
* that our allocation expanded too (or at least got no worse),
1446
* and ditto for later rels. So there should be room to move all
1447
* our data down without dropping any --- but we might have to
1448
* push down following rels to acquire the room. We don't want to
1449
* do the push more than once, so pack everything against the end
1450
* of the arena if so.
1452
* In corner cases where we are on the short end of a roundoff choice
1453
* that we were formerly on the long end of, it's possible that we
1454
* have to move down and compress our data too. In fact, even
1455
* after pushing down the following rels, there might not be as
1456
* much space as we computed for this rel above --- that would
1457
* imply that some following rel(s) are also on the losing end of
1458
* roundoff choices. We could handle this fairly by doing the
1459
* per-rel compactions out-of-order, but that seems like way too
1460
* much complexity to deal with a very infrequent corner case.
1461
* Instead, we simply drop pages from the end of the current rel's
1462
* data until it fits.
1464
if (newChunkIndex > oldChunkIndex)
1466
int limitChunkIndex;
1468
if (newAllocPages < fsmrel->storedPages)
1470
/* move and compress --- just drop excess pages */
1471
fsmrel->storedPages = newAllocPages;
1472
curChunks = fsm_current_chunks(fsmrel);
1474
/* is there enough space? */
1475
if (fsmrel->nextPhysical != NULL)
1476
limitChunkIndex = fsmrel->nextPhysical->firstChunk;
1478
limitChunkIndex = FreeSpaceMap->totalChunks;
1479
if (newChunkIndex + curChunks > limitChunkIndex)
1481
/* not enough space, push down following rels */
1484
push_fsm_rels_after(fsmrel);
1487
/* now is there enough space? */
1488
if (fsmrel->nextPhysical != NULL)
1489
limitChunkIndex = fsmrel->nextPhysical->firstChunk;
1491
limitChunkIndex = FreeSpaceMap->totalChunks;
1492
if (newChunkIndex + curChunks > limitChunkIndex)
1494
/* uh-oh, forcibly cut the allocation to fit */
1495
newAlloc = limitChunkIndex - newChunkIndex;
1498
* If newAlloc < 0 at this point, we are moving the
1499
* rel's firstChunk into territory currently assigned
1500
* to a later rel. This is okay so long as we do not
1501
* copy any data. The rels will be back in
1502
* nondecreasing firstChunk order at completion of the
1507
if (fsmrel->isIndex)
1508
newAllocPages = newAlloc * INDEXCHUNKPAGES;
1510
newAllocPages = newAlloc * CHUNKPAGES;
1511
fsmrel->storedPages = newAllocPages;
1512
curChunks = fsm_current_chunks(fsmrel);
1515
memmove(newLocation, oldLocation, curChunks * CHUNKBYTES);
1517
else if (newAllocPages < fsmrel->storedPages)
1520
* Need to compress the page data. For an index,
1521
* "compression" just means dropping excess pages; otherwise
1522
* we try to keep the ones with the most space.
1524
if (fsmrel->isIndex)
1526
fsmrel->storedPages = newAllocPages;
1527
/* may need to move data */
1528
if (newChunkIndex != oldChunkIndex)
1529
memmove(newLocation, oldLocation, newAlloc * CHUNKBYTES);
1533
pack_existing_pages((FSMPageData *) newLocation,
1535
(FSMPageData *) oldLocation,
1536
fsmrel->storedPages);
1537
fsmrel->storedPages = newAllocPages;
1540
else if (newChunkIndex != oldChunkIndex)
1543
* No compression needed, but must copy the data up
1545
memmove(newLocation, oldLocation, curChunks * CHUNKBYTES);
1547
fsmrel->firstChunk = newChunkIndex;
1548
nextChunkIndex += newAlloc;
1550
Assert(nextChunkIndex <= FreeSpaceMap->totalChunks);
1551
FreeSpaceMap->usedChunks = nextChunkIndex;
1555
* Push all FSMRels physically after afterRel to the end of the storage arena.
1557
* We sometimes have to do this when deletion or truncation of a relation
1558
* causes the allocations of remaining rels to expand markedly. We must
1559
* temporarily push existing data down to the end so that we can move it
1560
* back up in an orderly fashion.
1563
push_fsm_rels_after(FSMRelation *afterRel)
1565
int nextChunkIndex = FreeSpaceMap->totalChunks;
1566
FSMRelation *fsmrel;
1568
FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
1570
for (fsmrel = FreeSpaceMap->lastRel;
1572
fsmrel = fsmrel->priorPhysical)
1580
if (fsmrel == afterRel)
1583
chunkCount = fsm_current_chunks(fsmrel);
1584
nextChunkIndex -= chunkCount;
1585
newChunkIndex = nextChunkIndex;
1586
oldChunkIndex = fsmrel->firstChunk;
1587
if (newChunkIndex < oldChunkIndex)
1589
/* we're pushing down, how can it move up? */
1590
elog(PANIC, "inconsistent entry sizes in FSM");
1592
else if (newChunkIndex > oldChunkIndex)
1594
/* need to move it */
1595
newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES;
1596
oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES;
1597
memmove(newLocation, oldLocation, chunkCount * CHUNKBYTES);
1598
fsmrel->firstChunk = newChunkIndex;
1601
Assert(nextChunkIndex >= 0);
1605
* Pack a set of per-page freespace data into a smaller amount of space.
1607
* The method is to compute a low-resolution histogram of the free space
1608
* amounts, then determine which histogram bin contains the break point.
1609
* We then keep all pages above that bin, none below it, and just enough
1610
* of the pages in that bin to fill the output area exactly.
1612
#define HISTOGRAM_BINS 64
1615
pack_incoming_pages(FSMPageData *newLocation, int newPages,
1616
PageFreeSpaceInfo *pageSpaces, int nPages)
1618
int histogram[HISTOGRAM_BINS];
1625
Assert(newPages < nPages); /* else I shouldn't have been called */
1626
/* Build histogram */
1627
MemSet(histogram, 0, sizeof(histogram));
1628
for (i = 0; i < nPages; i++)
1630
Size avail = pageSpaces[i].avail;
1632
if (avail >= BLCKSZ)
1633
elog(ERROR, "bogus freespace amount");
1634
avail /= (BLCKSZ / HISTOGRAM_BINS);
1637
/* Find the breakpoint bin */
1639
for (i = HISTOGRAM_BINS - 1; i >= 0; i--)
1641
int sum = above + histogram[i];
1648
thresholdL = i * BLCKSZ / HISTOGRAM_BINS; /* low bound of bp bin */
1649
thresholdU = (i + 1) * BLCKSZ / HISTOGRAM_BINS; /* hi bound */
1650
binct = newPages - above; /* number to take from bp bin */
1651
/* And copy the appropriate data */
1652
for (i = 0; i < nPages; i++)
1654
BlockNumber page = pageSpaces[i].blkno;
1655
Size avail = pageSpaces[i].avail;
1657
/* Check caller provides sorted data */
1658
if (i > 0 && page <= pageSpaces[i - 1].blkno)
1659
elog(ERROR, "free-space data is not in page order");
1660
/* Save this page? */
1661
if (avail >= thresholdU ||
1662
(avail >= thresholdL && (--binct >= 0)))
1664
FSMPageSetPageNum(newLocation, page);
1665
FSMPageSetSpace(newLocation, avail);
1670
Assert(newPages == 0);
1674
* Pack a set of per-page freespace data into a smaller amount of space.
1676
* This is algorithmically identical to pack_incoming_pages(), but accepts
1677
* a different input representation. Also, we assume the input data has
1678
* previously been checked for validity (size in bounds, pages in order).
1680
* Note: it is possible for the source and destination arrays to overlap.
1681
* The caller is responsible for making sure newLocation is at lower addresses
1682
* so that we can copy data moving forward in the arrays without problem.
1685
pack_existing_pages(FSMPageData *newLocation, int newPages,
1686
FSMPageData *oldLocation, int oldPages)
1688
int histogram[HISTOGRAM_BINS];
1695
Assert(newPages < oldPages); /* else I shouldn't have been called */
1696
/* Build histogram */
1697
MemSet(histogram, 0, sizeof(histogram));
1698
for (i = 0; i < oldPages; i++)
1700
Size avail = FSMPageGetSpace(oldLocation + i);
1702
/* Shouldn't happen, but test to protect against stack clobber */
1703
if (avail >= BLCKSZ)
1704
elog(ERROR, "bogus freespace amount");
1705
avail /= (BLCKSZ / HISTOGRAM_BINS);
1708
/* Find the breakpoint bin */
1710
for (i = HISTOGRAM_BINS - 1; i >= 0; i--)
1712
int sum = above + histogram[i];
1719
thresholdL = i * BLCKSZ / HISTOGRAM_BINS; /* low bound of bp bin */
1720
thresholdU = (i + 1) * BLCKSZ / HISTOGRAM_BINS; /* hi bound */
1721
binct = newPages - above; /* number to take from bp bin */
1722
/* And copy the appropriate data */
1723
for (i = 0; i < oldPages; i++)
1725
BlockNumber page = FSMPageGetPageNum(oldLocation + i);
1726
Size avail = FSMPageGetSpace(oldLocation + i);
1728
/* Save this page? */
1729
if (avail >= thresholdU ||
1730
(avail >= thresholdL && (--binct >= 0)))
1732
FSMPageSetPageNum(newLocation, page);
1733
FSMPageSetSpace(newLocation, avail);
1738
Assert(newPages == 0);
1742
* Calculate number of chunks "requested" by a rel.
1744
* Rel's lastPageCount and isIndex settings must be up-to-date when called.
1746
* See notes at top of file for details.
1749
fsm_calc_request(FSMRelation *fsmrel)
1753
/* Convert page count to chunk count */
1754
if (fsmrel->isIndex)
1755
chunkCount = (fsmrel->lastPageCount - 1) / INDEXCHUNKPAGES + 1;
1757
chunkCount = (fsmrel->lastPageCount - 1) / CHUNKPAGES + 1;
1758
/* "Request" is anything beyond our one guaranteed chunk */
1759
if (chunkCount <= 0)
1762
return chunkCount - 1;
1766
* Calculate target allocation (number of chunks) for a rel
1768
* Parameter is the result from fsm_calc_request(). The global sumRequests
1769
* and numRels totals must be up-to-date already.
1771
* See notes at top of file for details.
1774
fsm_calc_target_allocation(int myRequest)
1779
spareChunks = FreeSpaceMap->totalChunks - FreeSpaceMap->numRels;
1780
Assert(spareChunks > 0);
1781
if (spareChunks >= FreeSpaceMap->sumRequests)
1783
/* We aren't oversubscribed, so allocate exactly the request */
1788
extra = (int) rint(spareChunks * myRequest / FreeSpaceMap->sumRequests);
1789
if (extra < 0) /* shouldn't happen, but make sure */
1796
* Calculate number of chunks actually used to store current data
1799
fsm_current_chunks(FSMRelation *fsmrel)
1803
/* Make sure storedPages==0 produces right answer */
1804
if (fsmrel->storedPages <= 0)
1806
/* Convert page count to chunk count */
1807
if (fsmrel->isIndex)
1808
chunkCount = (fsmrel->storedPages - 1) / INDEXCHUNKPAGES + 1;
1810
chunkCount = (fsmrel->storedPages - 1) / CHUNKPAGES + 1;
1815
* Calculate current actual allocation (number of chunks) for a rel
1818
fsm_current_allocation(FSMRelation *fsmrel)
1820
if (fsmrel->nextPhysical != NULL)
1821
return fsmrel->nextPhysical->firstChunk - fsmrel->firstChunk;
1822
else if (fsmrel == FreeSpaceMap->lastRel)
1823
return FreeSpaceMap->usedChunks - fsmrel->firstChunk;
1826
/* it's not in the storage-order list */
1827
Assert(fsmrel->firstChunk < 0 && fsmrel->storedPages == 0);
1833
#ifdef FREESPACE_DEBUG
1835
* Dump contents of freespace map for debugging.
1837
* We assume caller holds the FreeSpaceLock, or is otherwise unconcerned
1838
* about other processes.
1843
FSMRelation *fsmrel;
1844
FSMRelation *prevrel = NULL;
1848
for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = fsmrel->nextUsage)
1851
fprintf(stderr, "Map %d: rel %u/%u/%u isIndex %d avgRequest %u lastPageCount %d nextPage %d\nMap= ",
1853
fsmrel->key.spcNode, fsmrel->key.dbNode, fsmrel->key.relNode,
1854
(int) fsmrel->isIndex, fsmrel->avgRequest,
1855
fsmrel->lastPageCount, fsmrel->nextPage);
1856
if (fsmrel->isIndex)
1858
IndexFSMPageData *page;
1860
page = (IndexFSMPageData *)
1861
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1862
for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
1864
fprintf(stderr, " %u",
1865
IndexFSMPageGetPageNum(page));
1873
page = (FSMPageData *)
1874
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1875
for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
1877
fprintf(stderr, " %u:%u",
1878
FSMPageGetPageNum(page),
1879
FSMPageGetSpace(page));
1883
fprintf(stderr, "\n");
1884
/* Cross-check list links */
1885
if (prevrel != fsmrel->priorUsage)
1886
fprintf(stderr, "DumpFreeSpace: broken list links\n");
1889
if (prevrel != FreeSpaceMap->usageListTail)
1890
fprintf(stderr, "DumpFreeSpace: broken list links\n");
1891
/* Cross-check global counters */
1892
if (relNum != FreeSpaceMap->numRels)
1893
fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n",
1894
relNum, FreeSpaceMap->numRels);
1897
#endif /* FREESPACE_DEBUG */