1
/*-------------------------------------------------------------------------
4
* POSTGRES relation descriptor cache code
6
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
11
* src/backend/utils/cache/relcache.c
13
*-------------------------------------------------------------------------
17
* RelationCacheInitialize - initialize relcache (to empty)
18
* RelationCacheInitializePhase2 - initialize shared-catalog entries
19
* RelationCacheInitializePhase3 - finish initializing relcache
20
* RelationIdGetRelation - get a reldesc by relation id
21
* RelationClose - close an open relation
24
* The following code contains many undocumented hacks. Please be
33
#include "access/genam.h"
34
#include "access/reloptions.h"
35
#include "access/sysattr.h"
36
#include "access/transam.h"
37
#include "access/xact.h"
38
#include "catalog/catalog.h"
39
#include "catalog/index.h"
40
#include "catalog/indexing.h"
41
#include "catalog/namespace.h"
42
#include "catalog/pg_amproc.h"
43
#include "catalog/pg_attrdef.h"
44
#include "catalog/pg_authid.h"
45
#include "catalog/pg_auth_members.h"
46
#include "catalog/pg_constraint.h"
47
#include "catalog/pg_database.h"
48
#include "catalog/pg_namespace.h"
49
#include "catalog/pg_opclass.h"
50
#include "catalog/pg_proc.h"
51
#include "catalog/pg_rewrite.h"
52
#include "catalog/pg_tablespace.h"
53
#include "catalog/pg_trigger.h"
54
#include "catalog/pg_type.h"
55
#include "catalog/schemapg.h"
56
#include "catalog/storage.h"
57
#include "commands/trigger.h"
58
#include "miscadmin.h"
59
#include "optimizer/clauses.h"
60
#include "optimizer/planmain.h"
61
#include "optimizer/prep.h"
62
#include "optimizer/var.h"
63
#include "rewrite/rewriteDefine.h"
64
#include "storage/fd.h"
65
#include "storage/lmgr.h"
66
#include "storage/smgr.h"
67
#include "utils/array.h"
68
#include "utils/builtins.h"
69
#include "utils/fmgroids.h"
70
#include "utils/inval.h"
71
#include "utils/lsyscache.h"
72
#include "utils/memutils.h"
73
#include "utils/relcache.h"
74
#include "utils/relmapper.h"
75
#include "utils/resowner.h"
76
#include "utils/syscache.h"
77
#include "utils/tqual.h"
81
* name of relcache init file(s), used to speed up backend startup
83
#define RELCACHE_INIT_FILENAME "pg_internal.init"
85
#define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
88
* hardcoded tuple descriptors, contents generated by genbki.pl
90
static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
91
static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
92
static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
93
static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
94
static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
95
static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
96
static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
97
static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
100
* Hash tables that index the relation cache
102
* We used to index the cache by both name and OID, but now there
103
* is only an index by OID.
105
typedef struct relidcacheent
111
static HTAB *RelationIdCache;
114
* This flag is false until we have prepared the critical relcache entries
115
* that are needed to do indexscans on the tables read by relcache building.
117
bool criticalRelcachesBuilt = false;
120
* This flag is false until we have prepared the critical relcache entries
121
* for shared catalogs (which are the tables needed for login).
123
bool criticalSharedRelcachesBuilt = false;
126
* This counter counts relcache inval events received since backend startup
127
* (but only for rels that are actually in cache). Presently, we use it only
128
* to detect whether data about to be written by write_relcache_init_file()
129
* might already be obsolete.
131
static long relcacheInvalsReceived = 0L;
134
* This list remembers the OIDs of the non-shared relations cached in the
135
* database's local relcache init file. Note that there is no corresponding
136
* list for the shared relcache init file, for reasons explained in the
137
* comments for RelationCacheInitFileRemove.
139
static List *initFileRelationIds = NIL;
142
* This flag lets us optimize away work in AtEO(Sub)Xact_RelationCache().
144
static bool need_eoxact_work = false;
148
* macros to manipulate the lookup hashtables
150
#define RelationCacheInsert(RELATION) \
152
RelIdCacheEnt *idhentry; bool found; \
153
idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
154
(void *) &(RELATION->rd_id), \
155
HASH_ENTER, &found); \
156
/* used to give notice if found -- now just keep quiet */ \
157
idhentry->reldesc = RELATION; \
160
#define RelationIdCacheLookup(ID, RELATION) \
162
RelIdCacheEnt *hentry; \
163
hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
167
RELATION = hentry->reldesc; \
172
#define RelationCacheDelete(RELATION) \
174
RelIdCacheEnt *idhentry; \
175
idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
176
(void *) &(RELATION->rd_id), \
177
HASH_REMOVE, NULL); \
178
if (idhentry == NULL) \
179
elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
184
* Special cache for opclass-related information
186
* Note: only default support procs get cached, ie, those with
187
* lefttype = righttype = opcintype.
189
typedef struct opclasscacheent
191
Oid opclassoid; /* lookup key: OID of opclass */
192
bool valid; /* set TRUE after successful fill-in */
193
StrategyNumber numSupport; /* max # of support procs (from pg_am) */
194
Oid opcfamily; /* OID of opclass's family */
195
Oid opcintype; /* OID of opclass's declared input type */
196
RegProcedure *supportProcs; /* OIDs of support procedures */
199
static HTAB *OpClassCache = NULL;
202
/* non-export function prototypes */
204
static void RelationDestroyRelation(Relation relation);
205
static void RelationClearRelation(Relation relation, bool rebuild);
207
static void RelationReloadIndexInfo(Relation relation);
208
static void RelationFlushRelation(Relation relation);
209
static bool load_relcache_init_file(bool shared);
210
static void write_relcache_init_file(bool shared);
211
static void write_item(const void *data, Size len, FILE *fp);
213
static void formrdesc(const char *relationName, Oid relationReltype,
214
bool isshared, bool hasoids,
215
int natts, const FormData_pg_attribute *attrs);
217
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
218
static Relation AllocateRelationDesc(Form_pg_class relp);
219
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
220
static void RelationBuildTupleDesc(Relation relation);
221
static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
222
static void RelationInitPhysicalAddr(Relation relation);
223
static void load_critical_index(Oid indexoid, Oid heapoid);
224
static TupleDesc GetPgClassDescriptor(void);
225
static TupleDesc GetPgIndexDescriptor(void);
226
static void AttrDefaultFetch(Relation relation);
227
static void CheckConstraintFetch(Relation relation);
228
static List *insert_ordered_oid(List *list, Oid datum);
229
static void IndexSupportInitialize(oidvector *indclass,
230
RegProcedure *indexSupport,
233
StrategyNumber maxSupportNumber,
234
AttrNumber maxAttributeNumber);
235
static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
236
StrategyNumber numSupport);
237
static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
238
static void unlink_initfile(const char *initfilename);
244
* This is used by RelationBuildDesc to find a pg_class
245
* tuple matching targetRelId. The caller must hold at least
246
* AccessShareLock on the target relid to prevent concurrent-update
247
* scenarios --- else our SnapshotNow scan might fail to find any
248
* version that it thinks is live.
250
* NB: the returned tuple has been copied into palloc'd storage
251
* and must eventually be freed with heap_freetuple.
254
ScanPgRelation(Oid targetRelId, bool indexOK)
256
HeapTuple pg_class_tuple;
257
Relation pg_class_desc;
258
SysScanDesc pg_class_scan;
262
* If something goes wrong during backend startup, we might find ourselves
263
* trying to read pg_class before we've selected a database. That ain't
264
* gonna work, so bail out with a useful error message. If this happens,
265
* it probably means a relcache entry that needs to be nailed isn't.
267
if (!OidIsValid(MyDatabaseId))
268
elog(FATAL, "cannot read pg_class without having selected a database");
274
ObjectIdAttributeNumber,
275
BTEqualStrategyNumber, F_OIDEQ,
276
ObjectIdGetDatum(targetRelId));
279
* Open pg_class and fetch a tuple. Force heap scan if we haven't yet
280
* built the critical relcache entries (this includes initdb and startup
281
* without a pg_internal.init file). The caller can also force a heap
282
* scan by setting indexOK == false.
284
pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
285
pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
286
indexOK && criticalRelcachesBuilt,
290
pg_class_tuple = systable_getnext(pg_class_scan);
293
* Must copy tuple before releasing buffer.
295
if (HeapTupleIsValid(pg_class_tuple))
296
pg_class_tuple = heap_copytuple(pg_class_tuple);
299
systable_endscan(pg_class_scan);
300
heap_close(pg_class_desc, AccessShareLock);
302
return pg_class_tuple;
306
* AllocateRelationDesc
308
* This is used to allocate memory for a new relation descriptor
309
* and initialize the rd_rel field from the given pg_class tuple.
312
AllocateRelationDesc(Form_pg_class relp)
315
MemoryContext oldcxt;
316
Form_pg_class relationForm;
318
/* Relcache entries must live in CacheMemoryContext */
319
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
322
* allocate and zero space for new relation descriptor
324
relation = (Relation) palloc0(sizeof(RelationData));
326
/* make sure relation is marked as having no open file yet */
327
relation->rd_smgr = NULL;
330
* Copy the relation tuple form
332
* We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
333
* variable-length fields (relacl, reloptions) are NOT stored in the
334
* relcache --- there'd be little point in it, since we don't copy the
335
* tuple's nulls bitmap and hence wouldn't know if the values are valid.
336
* Bottom line is that relacl *cannot* be retrieved from the relcache. Get
337
* it from the syscache if you need it. The same goes for the original
338
* form of reloptions (however, we do store the parsed form of reloptions
341
relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
343
memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
345
/* initialize relation tuple form */
346
relation->rd_rel = relationForm;
348
/* and allocate attribute tuple form storage */
349
relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
350
relationForm->relhasoids);
351
/* which we mark as a reference-counted tupdesc */
352
relation->rd_att->tdrefcount = 1;
354
MemoryContextSwitchTo(oldcxt);
360
* RelationParseRelOptions
361
* Convert pg_class.reloptions into pre-parsed rd_options
363
* tuple is the real pg_class tuple (not rd_rel!) for relation
365
* Note: rd_rel and (if an index) rd_am must be valid already
368
RelationParseRelOptions(Relation relation, HeapTuple tuple)
372
relation->rd_options = NULL;
374
/* Fall out if relkind should not have options */
375
switch (relation->rd_rel->relkind)
377
case RELKIND_RELATION:
378
case RELKIND_TOASTVALUE:
386
* Fetch reloptions from tuple; have to use a hardwired descriptor because
387
* we might not have any other for pg_class yet (consider executing this
388
* code for pg_class itself)
390
options = extractRelOptions(tuple,
391
GetPgClassDescriptor(),
392
relation->rd_rel->relkind == RELKIND_INDEX ?
393
relation->rd_am->amoptions : InvalidOid);
396
* Copy parsed data into CacheMemoryContext. To guard against the
397
* possibility of leaks in the reloptions code, we want to do the actual
398
* parsing in the caller's memory context and copy the results into
399
* CacheMemoryContext after the fact.
403
relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
405
memcpy(relation->rd_options, options, VARSIZE(options));
411
* RelationBuildTupleDesc
413
* Form the relation's tuple descriptor from information in
414
* the pg_attribute, pg_attrdef & pg_constraint system catalogs.
417
RelationBuildTupleDesc(Relation relation)
419
HeapTuple pg_attribute_tuple;
420
Relation pg_attribute_desc;
421
SysScanDesc pg_attribute_scan;
425
AttrDefault *attrdef = NULL;
428
/* copy some fields from pg_class row to rd_att */
429
relation->rd_att->tdtypeid = relation->rd_rel->reltype;
430
relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
431
relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
433
constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
434
sizeof(TupleConstr));
435
constr->has_not_null = false;
438
* Form a scan key that selects only user attributes (attnum > 0).
439
* (Eliminating system attribute rows at the index level is lots faster
440
* than fetching them.)
442
ScanKeyInit(&skey[0],
443
Anum_pg_attribute_attrelid,
444
BTEqualStrategyNumber, F_OIDEQ,
445
ObjectIdGetDatum(RelationGetRelid(relation)));
446
ScanKeyInit(&skey[1],
447
Anum_pg_attribute_attnum,
448
BTGreaterStrategyNumber, F_INT2GT,
452
* Open pg_attribute and begin a scan. Force heap scan if we haven't yet
453
* built the critical relcache entries (this includes initdb and startup
454
* without a pg_internal.init file).
456
pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
457
pg_attribute_scan = systable_beginscan(pg_attribute_desc,
458
AttributeRelidNumIndexId,
459
criticalRelcachesBuilt,
464
* add attribute data to relation->rd_att
466
need = relation->rd_rel->relnatts;
468
while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
470
Form_pg_attribute attp;
472
attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
474
if (attp->attnum <= 0 ||
475
attp->attnum > relation->rd_rel->relnatts)
476
elog(ERROR, "invalid attribute number %d for %s",
477
attp->attnum, RelationGetRelationName(relation));
479
memcpy(relation->rd_att->attrs[attp->attnum - 1],
481
ATTRIBUTE_FIXED_PART_SIZE);
483
/* Update constraint/default info */
484
if (attp->attnotnull)
485
constr->has_not_null = true;
490
attrdef = (AttrDefault *)
491
MemoryContextAllocZero(CacheMemoryContext,
492
relation->rd_rel->relnatts *
493
sizeof(AttrDefault));
494
attrdef[ndef].adnum = attp->attnum;
495
attrdef[ndef].adbin = NULL;
504
* end the scan and close the attribute relation
506
systable_endscan(pg_attribute_scan);
507
heap_close(pg_attribute_desc, AccessShareLock);
510
elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
511
need, RelationGetRelid(relation));
514
* The attcacheoff values we read from pg_attribute should all be -1
515
* ("unknown"). Verify this if assert checking is on. They will be
516
* computed when and if needed during tuple access.
518
#ifdef USE_ASSERT_CHECKING
522
for (i = 0; i < relation->rd_rel->relnatts; i++)
523
Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
528
* However, we can easily set the attcacheoff value for the first
529
* attribute: it must be zero. This eliminates the need for special cases
530
* for attnum=1 that used to exist in fastgetattr() and index_getattr().
532
if (relation->rd_rel->relnatts > 0)
533
relation->rd_att->attrs[0]->attcacheoff = 0;
536
* Set up constraint/default info
538
if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
540
relation->rd_att->constr = constr;
542
if (ndef > 0) /* DEFAULTs */
544
if (ndef < relation->rd_rel->relnatts)
545
constr->defval = (AttrDefault *)
546
repalloc(attrdef, ndef * sizeof(AttrDefault));
548
constr->defval = attrdef;
549
constr->num_defval = ndef;
550
AttrDefaultFetch(relation);
553
constr->num_defval = 0;
555
if (relation->rd_rel->relchecks > 0) /* CHECKs */
557
constr->num_check = relation->rd_rel->relchecks;
558
constr->check = (ConstrCheck *)
559
MemoryContextAllocZero(CacheMemoryContext,
560
constr->num_check * sizeof(ConstrCheck));
561
CheckConstraintFetch(relation);
564
constr->num_check = 0;
569
relation->rd_att->constr = NULL;
574
* RelationBuildRuleLock
576
* Form the relation's rewrite rules from information in
577
* the pg_rewrite system catalog.
579
* Note: The rule parsetrees are potentially very complex node structures.
580
* To allow these trees to be freed when the relcache entry is flushed,
581
* we make a private memory context to hold the RuleLock information for
582
* each relcache entry that has associated rules. The context is used
583
* just for rule info, not for any other subsidiary data of the relcache
584
* entry, because that keeps the update logic in RelationClearRelation()
585
* manageable. The other subsidiary data structures are simple enough
586
* to be easy to free explicitly, anyway.
589
RelationBuildRuleLock(Relation relation)
591
MemoryContext rulescxt;
592
MemoryContext oldcxt;
593
HeapTuple rewrite_tuple;
594
Relation rewrite_desc;
595
TupleDesc rewrite_tupdesc;
596
SysScanDesc rewrite_scan;
604
* Make the private context. Parameters are set on the assumption that
605
* it'll probably not contain much data.
607
rulescxt = AllocSetContextCreate(CacheMemoryContext,
608
RelationGetRelationName(relation),
609
ALLOCSET_SMALL_MINSIZE,
610
ALLOCSET_SMALL_INITSIZE,
611
ALLOCSET_SMALL_MAXSIZE);
612
relation->rd_rulescxt = rulescxt;
615
* allocate an array to hold the rewrite rules (the array is extended if
619
rules = (RewriteRule **)
620
MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
627
Anum_pg_rewrite_ev_class,
628
BTEqualStrategyNumber, F_OIDEQ,
629
ObjectIdGetDatum(RelationGetRelid(relation)));
632
* open pg_rewrite and begin a scan
634
* Note: since we scan the rules using RewriteRelRulenameIndexId, we will
635
* be reading the rules in name order, except possibly during
636
* emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
637
* ensures that rules will be fired in name order.
639
rewrite_desc = heap_open(RewriteRelationId, AccessShareLock);
640
rewrite_tupdesc = RelationGetDescr(rewrite_desc);
641
rewrite_scan = systable_beginscan(rewrite_desc,
642
RewriteRelRulenameIndexId,
646
while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
648
Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
654
rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
655
sizeof(RewriteRule));
657
rule->ruleId = HeapTupleGetOid(rewrite_tuple);
659
rule->event = rewrite_form->ev_type - '0';
660
rule->attrno = rewrite_form->ev_attr;
661
rule->enabled = rewrite_form->ev_enabled;
662
rule->isInstead = rewrite_form->is_instead;
665
* Must use heap_getattr to fetch ev_action and ev_qual. Also, the
666
* rule strings are often large enough to be toasted. To avoid
667
* leaking memory in the caller's context, do the detoasting here so
668
* we can free the detoasted version.
670
rule_datum = heap_getattr(rewrite_tuple,
671
Anum_pg_rewrite_ev_action,
675
rule_str = TextDatumGetCString(rule_datum);
676
oldcxt = MemoryContextSwitchTo(rulescxt);
677
rule->actions = (List *) stringToNode(rule_str);
678
MemoryContextSwitchTo(oldcxt);
681
rule_datum = heap_getattr(rewrite_tuple,
682
Anum_pg_rewrite_ev_qual,
686
rule_str = TextDatumGetCString(rule_datum);
687
oldcxt = MemoryContextSwitchTo(rulescxt);
688
rule->qual = (Node *) stringToNode(rule_str);
689
MemoryContextSwitchTo(oldcxt);
693
* We want the rule's table references to be checked as though by the
694
* table owner, not the user referencing the rule. Therefore, scan
695
* through the rule's actions and set the checkAsUser field on all
696
* rtable entries. We have to look at the qual as well, in case it
699
* The reason for doing this when the rule is loaded, rather than when
700
* it is stored, is that otherwise ALTER TABLE OWNER would have to
701
* grovel through stored rules to update checkAsUser fields. Scanning
702
* the rule tree during load is relatively cheap (compared to
703
* constructing it in the first place), so we do it here.
705
setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
706
setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
708
if (numlocks >= maxlocks)
711
rules = (RewriteRule **)
712
repalloc(rules, sizeof(RewriteRule *) * maxlocks);
714
rules[numlocks++] = rule;
718
* end the scan and close the attribute relation
720
systable_endscan(rewrite_scan);
721
heap_close(rewrite_desc, AccessShareLock);
724
* there might not be any rules (if relhasrules is out-of-date)
728
relation->rd_rules = NULL;
729
relation->rd_rulescxt = NULL;
730
MemoryContextDelete(rulescxt);
735
* form a RuleLock and insert into relation
737
rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
738
rulelock->numLocks = numlocks;
739
rulelock->rules = rules;
741
relation->rd_rules = rulelock;
747
* Determine whether two RuleLocks are equivalent
749
* Probably this should be in the rules code someplace...
752
equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
757
* As of 7.3 we assume the rule ordering is repeatable, because
758
* RelationBuildRuleLock should read 'em in a consistent order. So just
759
* compare corresponding slots.
765
if (rlock1->numLocks != rlock2->numLocks)
767
for (i = 0; i < rlock1->numLocks; i++)
769
RewriteRule *rule1 = rlock1->rules[i];
770
RewriteRule *rule2 = rlock2->rules[i];
772
if (rule1->ruleId != rule2->ruleId)
774
if (rule1->event != rule2->event)
776
if (rule1->attrno != rule2->attrno)
778
if (rule1->enabled != rule2->enabled)
780
if (rule1->isInstead != rule2->isInstead)
782
if (!equal(rule1->qual, rule2->qual))
784
if (!equal(rule1->actions, rule2->actions))
788
else if (rlock2 != NULL)
797
* Build a relation descriptor. The caller must hold at least
798
* AccessShareLock on the target relid.
800
* The new descriptor is inserted into the hash table if insertIt is true.
802
* Returns NULL if no pg_class row could be found for the given relid
803
* (suggesting we are trying to access a just-deleted relation).
804
* Any other error is reported via elog.
807
RelationBuildDesc(Oid targetRelId, bool insertIt)
811
HeapTuple pg_class_tuple;
815
* find the tuple in pg_class corresponding to the given relation id
817
pg_class_tuple = ScanPgRelation(targetRelId, true);
820
* if no such tuple exists, return NULL
822
if (!HeapTupleIsValid(pg_class_tuple))
826
* get information from the pg_class_tuple
828
relid = HeapTupleGetOid(pg_class_tuple);
829
relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
830
Assert(relid == targetRelId);
833
* allocate storage for the relation descriptor, and copy pg_class_tuple
834
* to relation->rd_rel.
836
relation = AllocateRelationDesc(relp);
839
* initialize the relation's relation id (relation->rd_id)
841
RelationGetRelid(relation) = relid;
844
* normal relations are not nailed into the cache; nor can a pre-existing
845
* relation be new. It could be temp though. (Actually, it could be new
846
* too, but it's okay to forget that fact if forced to flush the entry.)
848
relation->rd_refcnt = 0;
849
relation->rd_isnailed = false;
850
relation->rd_createSubid = InvalidSubTransactionId;
851
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
852
switch (relation->rd_rel->relpersistence)
854
case RELPERSISTENCE_UNLOGGED:
855
case RELPERSISTENCE_PERMANENT:
856
relation->rd_backend = InvalidBackendId;
858
case RELPERSISTENCE_TEMP:
859
if (isTempOrToastNamespace(relation->rd_rel->relnamespace))
860
relation->rd_backend = MyBackendId;
864
* If it's a local temp table, but not one of ours, we have to
865
* use the slow, grotty method to figure out the owning
868
relation->rd_backend =
869
GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
870
Assert(relation->rd_backend != InvalidBackendId);
874
elog(ERROR, "invalid relpersistence: %c",
875
relation->rd_rel->relpersistence);
880
* initialize the tuple descriptor (relation->rd_att).
882
RelationBuildTupleDesc(relation);
885
* Fetch rules and triggers that affect this relation
887
if (relation->rd_rel->relhasrules)
888
RelationBuildRuleLock(relation);
891
relation->rd_rules = NULL;
892
relation->rd_rulescxt = NULL;
895
if (relation->rd_rel->relhastriggers)
896
RelationBuildTriggers(relation);
898
relation->trigdesc = NULL;
901
* if it's an index, initialize index-related information
903
if (OidIsValid(relation->rd_rel->relam))
904
RelationInitIndexAccessInfo(relation);
906
/* extract reloptions if any */
907
RelationParseRelOptions(relation, pg_class_tuple);
910
* initialize the relation lock manager information
912
RelationInitLockInfo(relation); /* see lmgr.c */
915
* initialize physical addressing information for the relation
917
RelationInitPhysicalAddr(relation);
919
/* make sure relation is marked as having no open file yet */
920
relation->rd_smgr = NULL;
923
* now we can free the memory allocated for pg_class_tuple
925
heap_freetuple(pg_class_tuple);
928
* Insert newly created relation into relcache hash table, if requested.
931
RelationCacheInsert(relation);
933
/* It's fully valid */
934
relation->rd_isvalid = true;
940
* Initialize the physical addressing info (RelFileNode) for a relcache entry
942
* Note: at the physical level, relations in the pg_global tablespace must
943
* be treated as shared, even if relisshared isn't set. Hence we do not
944
* look at relisshared here.
947
RelationInitPhysicalAddr(Relation relation)
949
if (relation->rd_rel->reltablespace)
950
relation->rd_node.spcNode = relation->rd_rel->reltablespace;
952
relation->rd_node.spcNode = MyDatabaseTableSpace;
953
if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
954
relation->rd_node.dbNode = InvalidOid;
956
relation->rd_node.dbNode = MyDatabaseId;
957
if (relation->rd_rel->relfilenode)
958
relation->rd_node.relNode = relation->rd_rel->relfilenode;
961
/* Consult the relation mapper */
962
relation->rd_node.relNode =
963
RelationMapOidToFilenode(relation->rd_id,
964
relation->rd_rel->relisshared);
965
if (!OidIsValid(relation->rd_node.relNode))
966
elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
967
RelationGetRelationName(relation), relation->rd_id);
972
* Initialize index-access-method support data for an index relation
975
RelationInitIndexAccessInfo(Relation relation)
981
Datum indoptionDatum;
985
int2vector *indoption;
986
MemoryContext indexcxt;
987
MemoryContext oldcontext;
992
* Make a copy of the pg_index entry for the index. Since pg_index
993
* contains variable-length and possibly-null fields, we have to do this
994
* honestly rather than just treating it as a Form_pg_index struct.
996
tuple = SearchSysCache1(INDEXRELID,
997
ObjectIdGetDatum(RelationGetRelid(relation)));
998
if (!HeapTupleIsValid(tuple))
999
elog(ERROR, "cache lookup failed for index %u",
1000
RelationGetRelid(relation));
1001
oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
1002
relation->rd_indextuple = heap_copytuple(tuple);
1003
relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1004
MemoryContextSwitchTo(oldcontext);
1005
ReleaseSysCache(tuple);
1008
* Make a copy of the pg_am entry for the index's access method
1010
tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1011
if (!HeapTupleIsValid(tuple))
1012
elog(ERROR, "cache lookup failed for access method %u",
1013
relation->rd_rel->relam);
1014
aform = (Form_pg_am) MemoryContextAlloc(CacheMemoryContext, sizeof *aform);
1015
memcpy(aform, GETSTRUCT(tuple), sizeof *aform);
1016
ReleaseSysCache(tuple);
1017
relation->rd_am = aform;
1019
natts = relation->rd_rel->relnatts;
1020
if (natts != relation->rd_index->indnatts)
1021
elog(ERROR, "relnatts disagrees with indnatts for index %u",
1022
RelationGetRelid(relation));
1023
amsupport = aform->amsupport;
1026
* Make the private context to hold index access info. The reason we need
1027
* a context, and not just a couple of pallocs, is so that we won't leak
1028
* any subsidiary info attached to fmgr lookup records.
1030
* Context parameters are set on the assumption that it'll probably not
1031
* contain much data.
1033
indexcxt = AllocSetContextCreate(CacheMemoryContext,
1034
RelationGetRelationName(relation),
1035
ALLOCSET_SMALL_MINSIZE,
1036
ALLOCSET_SMALL_INITSIZE,
1037
ALLOCSET_SMALL_MAXSIZE);
1038
relation->rd_indexcxt = indexcxt;
1041
* Allocate arrays to hold data
1043
relation->rd_aminfo = (RelationAmInfo *)
1044
MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
1046
relation->rd_opfamily = (Oid *)
1047
MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1048
relation->rd_opcintype = (Oid *)
1049
MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1053
int nsupport = natts * amsupport;
1055
relation->rd_support = (RegProcedure *)
1056
MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1057
relation->rd_supportinfo = (FmgrInfo *)
1058
MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1062
relation->rd_support = NULL;
1063
relation->rd_supportinfo = NULL;
1066
relation->rd_indcollation = (Oid *)
1067
MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1069
relation->rd_indoption = (int16 *)
1070
MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1073
* indcollation cannot be referenced directly through the C struct,
1074
* because it comes after the variable-width indkey field. Must extract
1075
* the datum the hard way...
1077
indcollDatum = fastgetattr(relation->rd_indextuple,
1078
Anum_pg_index_indcollation,
1079
GetPgIndexDescriptor(),
1082
indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1083
memcpy(relation->rd_indcollation, indcoll->values, natts * sizeof(Oid));
1086
* indclass cannot be referenced directly through the C struct, because it
1087
* comes after the variable-width indkey field. Must extract the datum
1090
indclassDatum = fastgetattr(relation->rd_indextuple,
1091
Anum_pg_index_indclass,
1092
GetPgIndexDescriptor(),
1095
indclass = (oidvector *) DatumGetPointer(indclassDatum);
1098
* Fill the support procedure OID array, as well as the info about
1099
* opfamilies and opclass input types. (aminfo and supportinfo are left
1100
* as zeroes, and are filled on-the-fly when used)
1102
IndexSupportInitialize(indclass, relation->rd_support,
1103
relation->rd_opfamily, relation->rd_opcintype,
1107
* Similarly extract indoption and copy it to the cache entry
1109
indoptionDatum = fastgetattr(relation->rd_indextuple,
1110
Anum_pg_index_indoption,
1111
GetPgIndexDescriptor(),
1114
indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1115
memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1118
* expressions, predicate, exclusion caches will be filled later
1120
relation->rd_indexprs = NIL;
1121
relation->rd_indpred = NIL;
1122
relation->rd_exclops = NULL;
1123
relation->rd_exclprocs = NULL;
1124
relation->rd_exclstrats = NULL;
1125
relation->rd_amcache = NULL;
1129
* IndexSupportInitialize
1130
* Initializes an index's cached opclass information,
1131
* given the index's pg_index.indclass entry.
1133
* Data is returned into *indexSupport, *opFamily, and *opcInType,
1134
* which are arrays allocated by the caller.
1136
* The caller also passes maxSupportNumber and maxAttributeNumber, since these
1137
* indicate the size of the arrays it has allocated --- but in practice these
1138
* numbers must always match those obtainable from the system catalog entries
1139
* for the index and access method.
1142
IndexSupportInitialize(oidvector *indclass,
1143
RegProcedure *indexSupport,
1146
StrategyNumber maxSupportNumber,
1147
AttrNumber maxAttributeNumber)
1151
for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1153
OpClassCacheEnt *opcentry;
1155
if (!OidIsValid(indclass->values[attIndex]))
1156
elog(ERROR, "bogus pg_index tuple");
1158
/* look up the info for this opclass, using a cache */
1159
opcentry = LookupOpclassInfo(indclass->values[attIndex],
1162
/* copy cached data into relcache entry */
1163
opFamily[attIndex] = opcentry->opcfamily;
1164
opcInType[attIndex] = opcentry->opcintype;
1165
if (maxSupportNumber > 0)
1166
memcpy(&indexSupport[attIndex * maxSupportNumber],
1167
opcentry->supportProcs,
1168
maxSupportNumber * sizeof(RegProcedure));
1175
* This routine maintains a per-opclass cache of the information needed
1176
* by IndexSupportInitialize(). This is more efficient than relying on
1177
* the catalog cache, because we can load all the info about a particular
1178
* opclass in a single indexscan of pg_amproc.
1180
* The information from pg_am about expected range of support function
1181
* numbers is passed in, rather than being looked up, mainly because the
1182
* caller will have it already.
1184
* Note there is no provision for flushing the cache. This is OK at the
1185
* moment because there is no way to ALTER any interesting properties of an
1186
* existing opclass --- all you can do is drop it, which will result in
1187
* a useless but harmless dead entry in the cache. To support altering
1188
* opclass membership (not the same as opfamily membership!), we'd need to
1189
* be able to flush this cache as well as the contents of relcache entries
1192
static OpClassCacheEnt *
1193
LookupOpclassInfo(Oid operatorClassOid,
1194
StrategyNumber numSupport)
1196
OpClassCacheEnt *opcentry;
1200
ScanKeyData skey[3];
1204
if (OpClassCache == NULL)
1206
/* First time through: initialize the opclass cache */
1209
MemSet(&ctl, 0, sizeof(ctl));
1210
ctl.keysize = sizeof(Oid);
1211
ctl.entrysize = sizeof(OpClassCacheEnt);
1212
ctl.hash = oid_hash;
1213
OpClassCache = hash_create("Operator class cache", 64,
1214
&ctl, HASH_ELEM | HASH_FUNCTION);
1216
/* Also make sure CacheMemoryContext exists */
1217
if (!CacheMemoryContext)
1218
CreateCacheMemoryContext();
1221
opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1222
(void *) &operatorClassOid,
1223
HASH_ENTER, &found);
1227
/* Need to allocate memory for new entry */
1228
opcentry->valid = false; /* until known OK */
1229
opcentry->numSupport = numSupport;
1232
opcentry->supportProcs = (RegProcedure *)
1233
MemoryContextAllocZero(CacheMemoryContext,
1234
numSupport * sizeof(RegProcedure));
1236
opcentry->supportProcs = NULL;
1240
Assert(numSupport == opcentry->numSupport);
1244
* When testing for cache-flush hazards, we intentionally disable the
1245
* operator class cache and force reloading of the info on each call. This
1246
* is helpful because we want to test the case where a cache flush occurs
1247
* while we are loading the info, and it's very hard to provoke that if
1248
* this happens only once per opclass per backend.
1250
#if defined(CLOBBER_CACHE_ALWAYS)
1251
opcentry->valid = false;
1254
if (opcentry->valid)
1258
* Need to fill in new entry.
1260
* To avoid infinite recursion during startup, force heap scans if we're
1261
* looking up info for the opclasses used by the indexes we would like to
1264
indexOK = criticalRelcachesBuilt ||
1265
(operatorClassOid != OID_BTREE_OPS_OID &&
1266
operatorClassOid != INT2_BTREE_OPS_OID);
1269
* We have to fetch the pg_opclass row to determine its opfamily and
1270
* opcintype, which are needed to look up related operators and functions.
1271
* It'd be convenient to use the syscache here, but that probably doesn't
1272
* work while bootstrapping.
1274
ScanKeyInit(&skey[0],
1275
ObjectIdAttributeNumber,
1276
BTEqualStrategyNumber, F_OIDEQ,
1277
ObjectIdGetDatum(operatorClassOid));
1278
rel = heap_open(OperatorClassRelationId, AccessShareLock);
1279
scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1280
SnapshotNow, 1, skey);
1282
if (HeapTupleIsValid(htup = systable_getnext(scan)))
1284
Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1286
opcentry->opcfamily = opclassform->opcfamily;
1287
opcentry->opcintype = opclassform->opcintype;
1290
elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1292
systable_endscan(scan);
1293
heap_close(rel, AccessShareLock);
1296
* Scan pg_amproc to obtain support procs for the opclass. We only fetch
1297
* the default ones (those with lefttype = righttype = opcintype).
1301
ScanKeyInit(&skey[0],
1302
Anum_pg_amproc_amprocfamily,
1303
BTEqualStrategyNumber, F_OIDEQ,
1304
ObjectIdGetDatum(opcentry->opcfamily));
1305
ScanKeyInit(&skey[1],
1306
Anum_pg_amproc_amproclefttype,
1307
BTEqualStrategyNumber, F_OIDEQ,
1308
ObjectIdGetDatum(opcentry->opcintype));
1309
ScanKeyInit(&skey[2],
1310
Anum_pg_amproc_amprocrighttype,
1311
BTEqualStrategyNumber, F_OIDEQ,
1312
ObjectIdGetDatum(opcentry->opcintype));
1313
rel = heap_open(AccessMethodProcedureRelationId, AccessShareLock);
1314
scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1315
SnapshotNow, 3, skey);
1317
while (HeapTupleIsValid(htup = systable_getnext(scan)))
1319
Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1321
if (amprocform->amprocnum <= 0 ||
1322
(StrategyNumber) amprocform->amprocnum > numSupport)
1323
elog(ERROR, "invalid amproc number %d for opclass %u",
1324
amprocform->amprocnum, operatorClassOid);
1326
opcentry->supportProcs[amprocform->amprocnum - 1] =
1330
systable_endscan(scan);
1331
heap_close(rel, AccessShareLock);
1334
opcentry->valid = true;
1342
* This is a special cut-down version of RelationBuildDesc(),
1343
* used while initializing the relcache.
1344
* The relation descriptor is built just from the supplied parameters,
1345
* without actually looking at any system table entries. We cheat
1346
* quite a lot since we only need to work for a few basic system
1349
* formrdesc is currently used for: pg_database, pg_authid, pg_auth_members,
1350
* pg_class, pg_attribute, pg_proc, and pg_type
1351
* (see RelationCacheInitializePhase2/3).
1353
* Note that these catalogs can't have constraints (except attnotnull),
1354
* default values, rules, or triggers, since we don't cope with any of that.
1355
* (Well, actually, this only matters for properties that need to be valid
1356
* during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1357
* these properties matter then...)
1359
* NOTE: we assume we are already switched into CacheMemoryContext.
1362
formrdesc(const char *relationName, Oid relationReltype,
1363
bool isshared, bool hasoids,
1364
int natts, const FormData_pg_attribute *attrs)
1371
* allocate new relation desc, clear all fields of reldesc
1373
relation = (Relation) palloc0(sizeof(RelationData));
1375
/* make sure relation is marked as having no open file yet */
1376
relation->rd_smgr = NULL;
1379
* initialize reference count: 1 because it is nailed in cache
1381
relation->rd_refcnt = 1;
1384
* all entries built with this routine are nailed-in-cache; none are for
1385
* new or temp relations.
1387
relation->rd_isnailed = true;
1388
relation->rd_createSubid = InvalidSubTransactionId;
1389
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1390
relation->rd_backend = InvalidBackendId;
1393
* initialize relation tuple form
1395
* The data we insert here is pretty incomplete/bogus, but it'll serve to
1396
* get us launched. RelationCacheInitializePhase3() will read the real
1397
* data from pg_class and replace what we've done here. Note in
1398
* particular that relowner is left as zero; this cues
1399
* RelationCacheInitializePhase3 that the real data isn't there yet.
1401
relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1403
namestrcpy(&relation->rd_rel->relname, relationName);
1404
relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1405
relation->rd_rel->reltype = relationReltype;
1408
* It's important to distinguish between shared and non-shared relations,
1409
* even at bootstrap time, to make sure we know where they are stored.
1411
relation->rd_rel->relisshared = isshared;
1413
relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1415
/* formrdesc is used only for permanent relations */
1416
relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1418
relation->rd_rel->relpages = 1;
1419
relation->rd_rel->reltuples = 1;
1420
relation->rd_rel->relkind = RELKIND_RELATION;
1421
relation->rd_rel->relhasoids = hasoids;
1422
relation->rd_rel->relnatts = (int16) natts;
1425
* initialize attribute tuple form
1427
* Unlike the case with the relation tuple, this data had better be right
1428
* because it will never be replaced. The data comes from
1429
* src/include/catalog/ headers via genbki.pl.
1431
relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1432
relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1434
relation->rd_att->tdtypeid = relationReltype;
1435
relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1438
* initialize tuple desc info
1440
has_not_null = false;
1441
for (i = 0; i < natts; i++)
1443
memcpy(relation->rd_att->attrs[i],
1445
ATTRIBUTE_FIXED_PART_SIZE);
1446
has_not_null |= attrs[i].attnotnull;
1447
/* make sure attcacheoff is valid */
1448
relation->rd_att->attrs[i]->attcacheoff = -1;
1451
/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1452
relation->rd_att->attrs[0]->attcacheoff = 0;
1454
/* mark not-null status */
1457
TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1459
constr->has_not_null = true;
1460
relation->rd_att->constr = constr;
1464
* initialize relation id from info in att array (my, this is ugly)
1466
RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1469
* All relations made with formrdesc are mapped. This is necessarily so
1470
* because there is no other way to know what filenode they currently
1471
* have. In bootstrap mode, add them to the initial relation mapper data,
1472
* specifying that the initial filenode is the same as the OID.
1474
relation->rd_rel->relfilenode = InvalidOid;
1475
if (IsBootstrapProcessingMode())
1476
RelationMapUpdateMap(RelationGetRelid(relation),
1477
RelationGetRelid(relation),
1481
* initialize the relation lock manager information
1483
RelationInitLockInfo(relation); /* see lmgr.c */
1486
* initialize physical addressing information for the relation
1488
RelationInitPhysicalAddr(relation);
1491
* initialize the rel-has-index flag, using hardwired knowledge
1493
if (IsBootstrapProcessingMode())
1495
/* In bootstrap mode, we have no indexes */
1496
relation->rd_rel->relhasindex = false;
1500
/* Otherwise, all the rels formrdesc is used for have indexes */
1501
relation->rd_rel->relhasindex = true;
1505
* add new reldesc to relcache
1507
RelationCacheInsert(relation);
1509
/* It's fully valid */
1510
relation->rd_isvalid = true;
1514
/* ----------------------------------------------------------------
1515
* Relation Descriptor Lookup Interface
1516
* ----------------------------------------------------------------
1520
* RelationIdGetRelation
1522
* Lookup a reldesc by OID; make one if not already in cache.
1524
* Returns NULL if no pg_class row could be found for the given relid
1525
* (suggesting we are trying to access a just-deleted relation).
1526
* Any other error is reported via elog.
1528
* NB: caller should already have at least AccessShareLock on the
1529
* relation ID, else there are nasty race conditions.
1531
* NB: relation ref count is incremented, or set to 1 if new entry.
1532
* Caller should eventually decrement count. (Usually,
1533
* that happens by calling RelationClose().)
1536
RelationIdGetRelation(Oid relationId)
1541
* first try to find reldesc in the cache
1543
RelationIdCacheLookup(relationId, rd);
1545
if (RelationIsValid(rd))
1547
RelationIncrementReferenceCount(rd);
1548
/* revalidate cache entry if necessary */
1549
if (!rd->rd_isvalid)
1552
* Indexes only have a limited number of possible schema changes,
1553
* and we don't want to use the full-blown procedure because it's
1554
* a headache for indexes that reload itself depends on.
1556
if (rd->rd_rel->relkind == RELKIND_INDEX)
1557
RelationReloadIndexInfo(rd);
1559
RelationClearRelation(rd, true);
1565
* no reldesc in the cache, so have RelationBuildDesc() build one and add
1568
rd = RelationBuildDesc(relationId, true);
1569
if (RelationIsValid(rd))
1570
RelationIncrementReferenceCount(rd);
1574
/* ----------------------------------------------------------------
1575
* cache invalidation support routines
1576
* ----------------------------------------------------------------
1580
* RelationIncrementReferenceCount
1581
* Increments relation reference count.
1583
* Note: bootstrap mode has its own weird ideas about relation refcount
1584
* behavior; we ought to fix it someday, but for now, just disable
1585
* reference count ownership tracking in bootstrap mode.
1588
RelationIncrementReferenceCount(Relation rel)
1590
ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
1591
rel->rd_refcnt += 1;
1592
if (!IsBootstrapProcessingMode())
1593
ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
1597
* RelationDecrementReferenceCount
1598
* Decrements relation reference count.
1601
RelationDecrementReferenceCount(Relation rel)
1603
Assert(rel->rd_refcnt > 0);
1604
rel->rd_refcnt -= 1;
1605
if (!IsBootstrapProcessingMode())
1606
ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
1610
* RelationClose - close an open relation
1612
* Actually, we just decrement the refcount.
1614
* NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
1615
* will be freed as soon as their refcount goes to zero. In combination
1616
* with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
1617
* to catch references to already-released relcache entries. It slows
1618
* things down quite a bit, however.
1621
RelationClose(Relation relation)
1623
/* Note: no locking manipulations needed */
1624
RelationDecrementReferenceCount(relation);
1626
#ifdef RELCACHE_FORCE_RELEASE
1627
if (RelationHasReferenceCountZero(relation) &&
1628
relation->rd_createSubid == InvalidSubTransactionId &&
1629
relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
1630
RelationClearRelation(relation, false);
1635
* RelationReloadIndexInfo - reload minimal information for an open index
1637
* This function is used only for indexes. A relcache inval on an index
1638
* can mean that its pg_class or pg_index row changed. There are only
1639
* very limited changes that are allowed to an existing index's schema,
1640
* so we can update the relcache entry without a complete rebuild; which
1641
* is fortunate because we can't rebuild an index entry that is "nailed"
1642
* and/or in active use. We support full replacement of the pg_class row,
1643
* as well as updates of a few simple fields of the pg_index row.
1645
* We can't necessarily reread the catalog rows right away; we might be
1646
* in a failed transaction when we receive the SI notification. If so,
1647
* RelationClearRelation just marks the entry as invalid by setting
1648
* rd_isvalid to false. This routine is called to fix the entry when it
1651
* We assume that at the time we are called, we have at least AccessShareLock
1652
* on the target index. (Note: in the calls from RelationClearRelation,
1653
* this is legitimate because we know the rel has positive refcount.)
1655
* If the target index is an index on pg_class or pg_index, we'd better have
1656
* previously gotten at least AccessShareLock on its underlying catalog,
1657
* else we are at risk of deadlock against someone trying to exclusive-lock
1658
* the heap and index in that order. This is ensured in current usage by
1659
* only applying this to indexes being opened or having positive refcount.
1662
RelationReloadIndexInfo(Relation relation)
1665
HeapTuple pg_class_tuple;
1668
/* Should be called only for invalidated indexes */
1669
Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
1670
!relation->rd_isvalid);
1671
/* Should be closed at smgr level */
1672
Assert(relation->rd_smgr == NULL);
1674
/* Must free any AM cached data upon relcache flush */
1675
if (relation->rd_amcache)
1676
pfree(relation->rd_amcache);
1677
relation->rd_amcache = NULL;
1680
* If it's a shared index, we might be called before backend startup has
1681
* finished selecting a database, in which case we have no way to read
1682
* pg_class yet. However, a shared index can never have any significant
1683
* schema updates, so it's okay to ignore the invalidation signal. Just
1684
* mark it valid and return without doing anything more.
1686
if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
1688
relation->rd_isvalid = true;
1693
* Read the pg_class row
1695
* Don't try to use an indexscan of pg_class_oid_index to reload the info
1696
* for pg_class_oid_index ...
1698
indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
1699
pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK);
1700
if (!HeapTupleIsValid(pg_class_tuple))
1701
elog(ERROR, "could not find pg_class tuple for index %u",
1702
RelationGetRelid(relation));
1703
relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1704
memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
1705
/* Reload reloptions in case they changed */
1706
if (relation->rd_options)
1707
pfree(relation->rd_options);
1708
RelationParseRelOptions(relation, pg_class_tuple);
1709
/* done with pg_class tuple */
1710
heap_freetuple(pg_class_tuple);
1711
/* We must recalculate physical address in case it changed */
1712
RelationInitPhysicalAddr(relation);
1715
* For a non-system index, there are fields of the pg_index row that are
1716
* allowed to change, so re-read that row and update the relcache entry.
1717
* Most of the info derived from pg_index (such as support function lookup
1718
* info) cannot change, and indeed the whole point of this routine is to
1719
* update the relcache entry without clobbering that data; so wholesale
1720
* replacement is not appropriate.
1722
if (!IsSystemRelation(relation))
1725
Form_pg_index index;
1727
tuple = SearchSysCache1(INDEXRELID,
1728
ObjectIdGetDatum(RelationGetRelid(relation)));
1729
if (!HeapTupleIsValid(tuple))
1730
elog(ERROR, "cache lookup failed for index %u",
1731
RelationGetRelid(relation));
1732
index = (Form_pg_index) GETSTRUCT(tuple);
1734
relation->rd_index->indisvalid = index->indisvalid;
1735
relation->rd_index->indcheckxmin = index->indcheckxmin;
1736
relation->rd_index->indisready = index->indisready;
1737
HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
1738
HeapTupleHeaderGetXmin(tuple->t_data));
1740
ReleaseSysCache(tuple);
1743
/* Okay, now it's valid again */
1744
relation->rd_isvalid = true;
1748
* RelationDestroyRelation
1750
* Physically delete a relation cache entry and all subsidiary data.
1751
* Caller must already have unhooked the entry from the hash table.
1754
RelationDestroyRelation(Relation relation)
1756
Assert(RelationHasReferenceCountZero(relation));
1759
* Make sure smgr and lower levels close the relation's files, if they
1760
* weren't closed already. (This was probably done by caller, but let's
1761
* just be real sure.)
1763
RelationCloseSmgr(relation);
1766
* Free all the subsidiary data structures of the relcache entry, then the
1769
if (relation->rd_rel)
1770
pfree(relation->rd_rel);
1771
/* can't use DecrTupleDescRefCount here */
1772
Assert(relation->rd_att->tdrefcount > 0);
1773
if (--relation->rd_att->tdrefcount == 0)
1774
FreeTupleDesc(relation->rd_att);
1775
list_free(relation->rd_indexlist);
1776
bms_free(relation->rd_indexattr);
1777
FreeTriggerDesc(relation->trigdesc);
1778
if (relation->rd_options)
1779
pfree(relation->rd_options);
1780
if (relation->rd_indextuple)
1781
pfree(relation->rd_indextuple);
1782
if (relation->rd_am)
1783
pfree(relation->rd_am);
1784
if (relation->rd_indexcxt)
1785
MemoryContextDelete(relation->rd_indexcxt);
1786
if (relation->rd_rulescxt)
1787
MemoryContextDelete(relation->rd_rulescxt);
1792
* RelationClearRelation
1794
* Physically blow away a relation cache entry, or reset it and rebuild
1795
* it from scratch (that is, from catalog entries). The latter path is
1796
* used when we are notified of a change to an open relation (one with
1799
* NB: when rebuilding, we'd better hold some lock on the relation,
1800
* else the catalog data we need to read could be changing under us.
1801
* Also, a rel to be rebuilt had better have refcnt > 0. This is because
1802
* an sinval reset could happen while we're accessing the catalogs, and
1803
* the rel would get blown away underneath us by RelationCacheInvalidate
1804
* if it has zero refcnt.
1806
* The "rebuild" parameter is redundant in current usage because it has
1807
* to match the relation's refcnt status, but we keep it as a crosscheck
1808
* that we're doing what the caller expects.
1811
RelationClearRelation(Relation relation, bool rebuild)
1814
* As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
1815
* course it would be a bad idea to blow away one with nonzero refcnt.
1818
!RelationHasReferenceCountZero(relation) :
1819
RelationHasReferenceCountZero(relation));
1822
* Make sure smgr and lower levels close the relation's files, if they
1823
* weren't closed already. If the relation is not getting deleted, the
1824
* next smgr access should reopen the files automatically. This ensures
1825
* that the low-level file access state is updated after, say, a vacuum
1828
RelationCloseSmgr(relation);
1831
* Never, never ever blow away a nailed-in system relation, because we'd
1832
* be unable to recover. However, we must redo RelationInitPhysicalAddr
1833
* in case it is a mapped relation whose mapping changed.
1835
* If it's a nailed index, then we need to re-read the pg_class row to see
1836
* if its relfilenode changed. We can't necessarily do that here, because
1837
* we might be in a failed transaction. We assume it's okay to do it if
1838
* there are open references to the relcache entry (cf notes for
1839
* AtEOXact_RelationCache). Otherwise just mark the entry as possibly
1840
* invalid, and it'll be fixed when next opened.
1842
if (relation->rd_isnailed)
1844
RelationInitPhysicalAddr(relation);
1846
if (relation->rd_rel->relkind == RELKIND_INDEX)
1848
relation->rd_isvalid = false; /* needs to be revalidated */
1849
if (relation->rd_refcnt > 1)
1850
RelationReloadIndexInfo(relation);
1856
* Even non-system indexes should not be blown away if they are open and
1857
* have valid index support information. This avoids problems with active
1858
* use of the index support information. As with nailed indexes, we
1859
* re-read the pg_class row to handle possible physical relocation of the
1860
* index, and we check for pg_index updates too.
1862
if (relation->rd_rel->relkind == RELKIND_INDEX &&
1863
relation->rd_refcnt > 0 &&
1864
relation->rd_indexcxt != NULL)
1866
relation->rd_isvalid = false; /* needs to be revalidated */
1867
RelationReloadIndexInfo(relation);
1871
/* Mark it invalid until we've finished rebuild */
1872
relation->rd_isvalid = false;
1875
* If we're really done with the relcache entry, blow it away. But if
1876
* someone is still using it, reconstruct the whole deal without moving
1877
* the physical RelationData record (so that the someone's pointer is
1882
/* Remove it from the hash table */
1883
RelationCacheDelete(relation);
1885
/* And release storage */
1886
RelationDestroyRelation(relation);
1891
* Our strategy for rebuilding an open relcache entry is to build a
1892
* new entry from scratch, swap its contents with the old entry, and
1893
* finally delete the new entry (along with any infrastructure swapped
1894
* over from the old entry). This is to avoid trouble in case an
1895
* error causes us to lose control partway through. The old entry
1896
* will still be marked !rd_isvalid, so we'll try to rebuild it again
1897
* on next access. Meanwhile it's not any less valid than it was
1898
* before, so any code that might expect to continue accessing it
1899
* isn't hurt by the rebuild failure. (Consider for example a
1900
* subtransaction that ALTERs a table and then gets cancelled partway
1901
* through the cache entry rebuild. The outer transaction should
1902
* still see the not-modified cache entry as valid.) The worst
1903
* consequence of an error is leaking the necessarily-unreferenced new
1904
* entry, and this shouldn't happen often enough for that to be a big
1907
* When rebuilding an open relcache entry, we must preserve ref count,
1908
* rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
1909
* attempt to preserve the pg_class entry (rd_rel), tupledesc, and
1910
* rewrite-rule substructures in place, because various places assume
1911
* that these structures won't move while they are working with an
1912
* open relcache entry. (Note: the refcount mechanism for tupledescs
1913
* might someday allow us to remove this hack for the tupledesc.)
1915
* Note that this process does not touch CurrentResourceOwner; which
1916
* is good because whatever ref counts the entry may have do not
1917
* necessarily belong to that resource owner.
1920
Oid save_relid = RelationGetRelid(relation);
1924
/* Build temporary entry, but don't link it into hashtable */
1925
newrel = RelationBuildDesc(save_relid, false);
1928
/* Should only get here if relation was deleted */
1929
RelationCacheDelete(relation);
1930
RelationDestroyRelation(relation);
1931
elog(ERROR, "relation %u deleted while still in use", save_relid);
1934
keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
1935
keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
1938
* Perform swapping of the relcache entry contents. Within this
1939
* process the old entry is momentarily invalid, so there *must* be no
1940
* possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
1941
* all-in-line code for safety.
1943
* Since the vast majority of fields should be swapped, our method is
1944
* to swap the whole structures and then re-swap those few fields we
1945
* didn't want swapped.
1947
#define SWAPFIELD(fldtype, fldname) \
1949
fldtype _tmp = newrel->fldname; \
1950
newrel->fldname = relation->fldname; \
1951
relation->fldname = _tmp; \
1954
/* swap all Relation struct fields */
1956
RelationData tmpstruct;
1958
memcpy(&tmpstruct, newrel, sizeof(RelationData));
1959
memcpy(newrel, relation, sizeof(RelationData));
1960
memcpy(relation, &tmpstruct, sizeof(RelationData));
1963
/* rd_smgr must not be swapped, due to back-links from smgr level */
1964
SWAPFIELD(SMgrRelation, rd_smgr);
1965
/* rd_refcnt must be preserved */
1966
SWAPFIELD(int, rd_refcnt);
1967
/* isnailed shouldn't change */
1968
Assert(newrel->rd_isnailed == relation->rd_isnailed);
1969
/* creation sub-XIDs must be preserved */
1970
SWAPFIELD(SubTransactionId, rd_createSubid);
1971
SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
1972
/* un-swap rd_rel pointers, swap contents instead */
1973
SWAPFIELD(Form_pg_class, rd_rel);
1974
/* ... but actually, we don't have to update newrel->rd_rel */
1975
memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
1976
/* preserve old tupledesc and rules if no logical change */
1978
SWAPFIELD(TupleDesc, rd_att);
1981
SWAPFIELD(RuleLock *, rd_rules);
1982
SWAPFIELD(MemoryContext, rd_rulescxt);
1984
/* toast OID override must be preserved */
1985
SWAPFIELD(Oid, rd_toastoid);
1986
/* pgstat_info must be preserved */
1987
SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
1991
/* And now we can throw away the temporary entry */
1992
RelationDestroyRelation(newrel);
1997
* RelationFlushRelation
1999
* Rebuild the relation if it is open (refcount > 0), else blow it away.
2002
RelationFlushRelation(Relation relation)
2004
if (relation->rd_createSubid != InvalidSubTransactionId ||
2005
relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2008
* New relcache entries are always rebuilt, not flushed; else we'd
2009
* forget the "new" status of the relation, which is a useful
2010
* optimization to have. Ditto for the new-relfilenode status.
2012
* The rel could have zero refcnt here, so temporarily increment the
2013
* refcnt to ensure it's safe to rebuild it. We can assume that the
2014
* current transaction has some lock on the rel already.
2016
RelationIncrementReferenceCount(relation);
2017
RelationClearRelation(relation, true);
2018
RelationDecrementReferenceCount(relation);
2023
* Pre-existing rels can be dropped from the relcache if not open.
2025
bool rebuild = !RelationHasReferenceCountZero(relation);
2027
RelationClearRelation(relation, rebuild);
2032
* RelationForgetRelation - unconditionally remove a relcache entry
2034
* External interface for destroying a relcache entry when we
2035
* drop the relation.
2038
RelationForgetRelation(Oid rid)
2042
RelationIdCacheLookup(rid, relation);
2044
if (!PointerIsValid(relation))
2045
return; /* not in cache, nothing to do */
2047
if (!RelationHasReferenceCountZero(relation))
2048
elog(ERROR, "relation %u is still open", rid);
2050
/* Unconditionally destroy the relcache entry */
2051
RelationClearRelation(relation, false);
2055
* RelationCacheInvalidateEntry
2057
* This routine is invoked for SI cache flush messages.
2059
* Any relcache entry matching the relid must be flushed. (Note: caller has
2060
* already determined that the relid belongs to our database or is a shared
2063
* We used to skip local relations, on the grounds that they could
2064
* not be targets of cross-backend SI update messages; but it seems
2065
* safer to process them, so that our *own* SI update messages will
2066
* have the same effects during CommandCounterIncrement for both
2067
* local and nonlocal relations.
2070
RelationCacheInvalidateEntry(Oid relationId)
2074
RelationIdCacheLookup(relationId, relation);
2076
if (PointerIsValid(relation))
2078
relcacheInvalsReceived++;
2079
RelationFlushRelation(relation);
2084
* RelationCacheInvalidate
2085
* Blow away cached relation descriptors that have zero reference counts,
2086
* and rebuild those with positive reference counts. Also reset the smgr
2087
* relation cache and re-read relation mapping data.
2089
* This is currently used only to recover from SI message buffer overflow,
2090
* so we do not touch new-in-transaction relations; they cannot be targets
2091
* of cross-backend SI updates (and our own updates now go through a
2092
* separate linked list that isn't limited by the SI message buffer size).
2093
* Likewise, we need not discard new-relfilenode-in-transaction hints,
2094
* since any invalidation of those would be a local event.
2096
* We do this in two phases: the first pass deletes deletable items, and
2097
* the second one rebuilds the rebuildable items. This is essential for
2098
* safety, because hash_seq_search only copes with concurrent deletion of
2099
* the element it is currently visiting. If a second SI overflow were to
2100
* occur while we are walking the table, resulting in recursive entry to
2101
* this routine, we could crash because the inner invocation blows away
2102
* the entry next to be visited by the outer scan. But this way is OK,
2103
* because (a) during the first pass we won't process any more SI messages,
2104
* so hash_seq_search will complete safely; (b) during the second pass we
2105
* only hold onto pointers to nondeletable entries.
2107
* The two-phase approach also makes it easy to ensure that we process
2108
* nailed-in-cache indexes before other nondeletable items, and that we
2109
* process pg_class_oid_index first of all. In scenarios where a nailed
2110
* index has been given a new relfilenode, we have to detect that update
2111
* before the nailed index is used in reloading any other relcache entry.
2114
RelationCacheInvalidate(void)
2116
HASH_SEQ_STATUS status;
2117
RelIdCacheEnt *idhentry;
2119
List *rebuildFirstList = NIL;
2120
List *rebuildList = NIL;
2124
hash_seq_init(&status, RelationIdCache);
2126
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2128
relation = idhentry->reldesc;
2130
/* Must close all smgr references to avoid leaving dangling ptrs */
2131
RelationCloseSmgr(relation);
2133
/* Ignore new relations, since they are never SI targets */
2134
if (relation->rd_createSubid != InvalidSubTransactionId)
2137
relcacheInvalsReceived++;
2139
if (RelationHasReferenceCountZero(relation))
2141
/* Delete this entry immediately */
2142
Assert(!relation->rd_isnailed);
2143
RelationClearRelation(relation, false);
2148
* Add this entry to list of stuff to rebuild in second pass.
2149
* pg_class_oid_index goes on the front of rebuildFirstList, other
2150
* nailed indexes on the back, and everything else into
2151
* rebuildList (in no particular order).
2153
if (relation->rd_isnailed &&
2154
relation->rd_rel->relkind == RELKIND_INDEX)
2156
if (RelationGetRelid(relation) == ClassOidIndexId)
2157
rebuildFirstList = lcons(relation, rebuildFirstList);
2159
rebuildFirstList = lappend(rebuildFirstList, relation);
2162
rebuildList = lcons(relation, rebuildList);
2167
* Now zap any remaining smgr cache entries. This must happen before we
2168
* start to rebuild entries, since that may involve catalog fetches which
2169
* will re-open catalog files.
2174
* Reload relation mapping data before starting to reconstruct cache.
2176
RelationMapInvalidateAll();
2178
/* Phase 2: rebuild the items found to need rebuild in phase 1 */
2179
foreach(l, rebuildFirstList)
2181
relation = (Relation) lfirst(l);
2182
RelationClearRelation(relation, true);
2184
list_free(rebuildFirstList);
2185
foreach(l, rebuildList)
2187
relation = (Relation) lfirst(l);
2188
RelationClearRelation(relation, true);
2190
list_free(rebuildList);
2194
* RelationCloseSmgrByOid - close a relcache entry's smgr link
2196
* Needed in some cases where we are changing a relation's physical mapping.
2197
* The link will be automatically reopened on next use.
2200
RelationCloseSmgrByOid(Oid relationId)
2204
RelationIdCacheLookup(relationId, relation);
2206
if (!PointerIsValid(relation))
2207
return; /* not in cache, nothing to do */
2209
RelationCloseSmgr(relation);
2213
* AtEOXact_RelationCache
2215
* Clean up the relcache at main-transaction commit or abort.
2217
* Note: this must be called *before* processing invalidation messages.
2218
* In the case of abort, we don't want to try to rebuild any invalidated
2219
* cache entries (since we can't safely do database accesses). Therefore
2220
* we must reset refcnts before handling pending invalidations.
2222
* As of PostgreSQL 8.1, relcache refcnts should get released by the
2223
* ResourceOwner mechanism. This routine just does a debugging
2224
* cross-check that no pins remain. However, we also need to do special
2225
* cleanup when the current transaction created any relations or made use
2226
* of forced index lists.
2229
AtEOXact_RelationCache(bool isCommit)
2231
HASH_SEQ_STATUS status;
2232
RelIdCacheEnt *idhentry;
2235
* To speed up transaction exit, we want to avoid scanning the relcache
2236
* unless there is actually something for this routine to do. Other than
2237
* the debug-only Assert checks, most transactions don't create any work
2238
* for us to do here, so we keep a static flag that gets set if there is
2239
* anything to do. (Currently, this means either a relation is created in
2240
* the current xact, or one is given a new relfilenode, or an index list
2241
* is forced.) For simplicity, the flag remains set till end of top-level
2242
* transaction, even though we could clear it at subtransaction end in
2245
if (!need_eoxact_work
2246
#ifdef USE_ASSERT_CHECKING
2252
hash_seq_init(&status, RelationIdCache);
2254
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2256
Relation relation = idhentry->reldesc;
2259
* The relcache entry's ref count should be back to its normal
2260
* not-in-a-transaction state: 0 unless it's nailed in cache.
2262
* In bootstrap mode, this is NOT true, so don't check it --- the
2263
* bootstrap code expects relations to stay open across start/commit
2264
* transaction calls. (That seems bogus, but it's not worth fixing.)
2266
#ifdef USE_ASSERT_CHECKING
2267
if (!IsBootstrapProcessingMode())
2269
int expected_refcnt;
2271
expected_refcnt = relation->rd_isnailed ? 1 : 0;
2272
Assert(relation->rd_refcnt == expected_refcnt);
2277
* Is it a relation created in the current transaction?
2279
* During commit, reset the flag to zero, since we are now out of the
2280
* creating transaction. During abort, simply delete the relcache
2281
* entry --- it isn't interesting any longer. (NOTE: if we have
2282
* forgotten the new-ness of a new relation due to a forced cache
2283
* flush, the entry will get deleted anyway by shared-cache-inval
2284
* processing of the aborted pg_class insertion.)
2286
if (relation->rd_createSubid != InvalidSubTransactionId)
2289
relation->rd_createSubid = InvalidSubTransactionId;
2292
RelationClearRelation(relation, false);
2298
* Likewise, reset the hint about the relfilenode being new.
2300
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2303
* Flush any temporary index list.
2305
if (relation->rd_indexvalid == 2)
2307
list_free(relation->rd_indexlist);
2308
relation->rd_indexlist = NIL;
2309
relation->rd_oidindex = InvalidOid;
2310
relation->rd_indexvalid = 0;
2314
/* Once done with the transaction, we can reset need_eoxact_work */
2315
need_eoxact_work = false;
2319
* AtEOSubXact_RelationCache
2321
* Clean up the relcache at sub-transaction commit or abort.
2323
* Note: this must be called *before* processing invalidation messages.
2326
AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
2327
SubTransactionId parentSubid)
2329
HASH_SEQ_STATUS status;
2330
RelIdCacheEnt *idhentry;
2333
* Skip the relcache scan if nothing to do --- see notes for
2334
* AtEOXact_RelationCache.
2336
if (!need_eoxact_work)
2339
hash_seq_init(&status, RelationIdCache);
2341
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2343
Relation relation = idhentry->reldesc;
2346
* Is it a relation created in the current subtransaction?
2348
* During subcommit, mark it as belonging to the parent, instead.
2349
* During subabort, simply delete the relcache entry.
2351
if (relation->rd_createSubid == mySubid)
2354
relation->rd_createSubid = parentSubid;
2357
RelationClearRelation(relation, false);
2363
* Likewise, update or drop any new-relfilenode-in-subtransaction
2366
if (relation->rd_newRelfilenodeSubid == mySubid)
2369
relation->rd_newRelfilenodeSubid = parentSubid;
2371
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2375
* Flush any temporary index list.
2377
if (relation->rd_indexvalid == 2)
2379
list_free(relation->rd_indexlist);
2380
relation->rd_indexlist = NIL;
2381
relation->rd_oidindex = InvalidOid;
2382
relation->rd_indexvalid = 0;
2389
* RelationBuildLocalRelation
2390
* Build a relcache entry for an about-to-be-created relation,
2391
* and enter it into the relcache.
2394
RelationBuildLocalRelation(const char *relname,
2399
bool shared_relation,
2400
bool mapped_relation,
2401
char relpersistence)
2404
MemoryContext oldcxt;
2405
int natts = tupDesc->natts;
2410
AssertArg(natts >= 0);
2413
* check for creation of a rel that must be nailed in cache.
2415
* XXX this list had better match the relations specially handled in
2416
* RelationCacheInitializePhase2/3.
2420
case DatabaseRelationId:
2421
case AuthIdRelationId:
2422
case AuthMemRelationId:
2423
case RelationRelationId:
2424
case AttributeRelationId:
2425
case ProcedureRelationId:
2426
case TypeRelationId:
2435
* check that hardwired list of shared rels matches what's in the
2436
* bootstrap .bki file. If you get a failure here during initdb, you
2437
* probably need to fix IsSharedRelation() to match whatever you've done
2438
* to the set of shared relations.
2440
if (shared_relation != IsSharedRelation(relid))
2441
elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
2444
/* Shared relations had better be mapped, too */
2445
Assert(mapped_relation || !shared_relation);
2448
* switch to the cache context to create the relcache entry.
2450
if (!CacheMemoryContext)
2451
CreateCacheMemoryContext();
2453
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2456
* allocate a new relation descriptor and fill in basic state fields.
2458
rel = (Relation) palloc0(sizeof(RelationData));
2460
/* make sure relation is marked as having no open file yet */
2461
rel->rd_smgr = NULL;
2463
/* mark it nailed if appropriate */
2464
rel->rd_isnailed = nailit;
2466
rel->rd_refcnt = nailit ? 1 : 0;
2468
/* it's being created in this transaction */
2469
rel->rd_createSubid = GetCurrentSubTransactionId();
2470
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2472
/* must flag that we have rels created in this transaction */
2473
need_eoxact_work = true;
2476
* create a new tuple descriptor from the one passed in. We do this
2477
* partly to copy it into the cache context, and partly because the new
2478
* relation can't have any defaults or constraints yet; they have to be
2479
* added in later steps, because they require additions to multiple system
2480
* catalogs. We can copy attnotnull constraints here, however.
2482
rel->rd_att = CreateTupleDescCopy(tupDesc);
2483
rel->rd_att->tdrefcount = 1; /* mark as refcounted */
2484
has_not_null = false;
2485
for (i = 0; i < natts; i++)
2487
rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
2488
has_not_null |= tupDesc->attrs[i]->attnotnull;
2493
TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
2495
constr->has_not_null = true;
2496
rel->rd_att->constr = constr;
2500
* initialize relation tuple form (caller may add/override data later)
2502
rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
2504
namestrcpy(&rel->rd_rel->relname, relname);
2505
rel->rd_rel->relnamespace = relnamespace;
2507
rel->rd_rel->relkind = RELKIND_UNCATALOGED;
2508
rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
2509
rel->rd_rel->relnatts = natts;
2510
rel->rd_rel->reltype = InvalidOid;
2511
/* needed when bootstrapping: */
2512
rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
2514
/* set up persistence; rd_backend is a function of persistence type */
2515
rel->rd_rel->relpersistence = relpersistence;
2516
switch (relpersistence)
2518
case RELPERSISTENCE_UNLOGGED:
2519
case RELPERSISTENCE_PERMANENT:
2520
rel->rd_backend = InvalidBackendId;
2522
case RELPERSISTENCE_TEMP:
2523
rel->rd_backend = MyBackendId;
2526
elog(ERROR, "invalid relpersistence: %c", relpersistence);
2531
* Insert relation physical and logical identifiers (OIDs) into the right
2532
* places. Note that the physical ID (relfilenode) is initially the same
2533
* as the logical ID (OID); except that for a mapped relation, we set
2534
* relfilenode to zero and rely on RelationInitPhysicalAddr to consult the
2537
rel->rd_rel->relisshared = shared_relation;
2539
RelationGetRelid(rel) = relid;
2541
for (i = 0; i < natts; i++)
2542
rel->rd_att->attrs[i]->attrelid = relid;
2544
rel->rd_rel->reltablespace = reltablespace;
2546
if (mapped_relation)
2548
rel->rd_rel->relfilenode = InvalidOid;
2549
/* Add it to the active mapping information */
2550
RelationMapUpdateMap(relid, relid, shared_relation, true);
2553
rel->rd_rel->relfilenode = relid;
2555
RelationInitLockInfo(rel); /* see lmgr.c */
2557
RelationInitPhysicalAddr(rel);
2560
* Okay to insert into the relcache hash tables.
2562
RelationCacheInsert(rel);
2565
* done building relcache entry.
2567
MemoryContextSwitchTo(oldcxt);
2569
/* It's fully valid */
2570
rel->rd_isvalid = true;
2573
* Caller expects us to pin the returned entry.
2575
RelationIncrementReferenceCount(rel);
2582
* RelationSetNewRelfilenode
2584
* Assign a new relfilenode (physical file name) to the relation.
2586
* This allows a full rewrite of the relation to be done with transactional
2587
* safety (since the filenode assignment can be rolled back). Note however
2588
* that there is no simple way to access the relation's old data for the
2589
* remainder of the current transaction. This limits the usefulness to cases
2590
* such as TRUNCATE or rebuilding an index from scratch.
2592
* Caller must already hold exclusive lock on the relation.
2594
* The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId
2595
* must be passed for indexes and sequences). This should be a lower bound on
2596
* the XIDs that will be put into the new relation contents.
2599
RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
2602
RelFileNodeBackend newrnode;
2605
Form_pg_class classform;
2607
/* Indexes, sequences must have Invalid frozenxid; other rels must not */
2608
Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
2609
relation->rd_rel->relkind == RELKIND_SEQUENCE) ?
2610
freezeXid == InvalidTransactionId :
2611
TransactionIdIsNormal(freezeXid));
2613
/* Allocate a new relfilenode */
2614
newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
2615
relation->rd_rel->relpersistence);
2618
* Get a writable copy of the pg_class tuple for the given relation.
2620
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
2622
tuple = SearchSysCacheCopy1(RELOID,
2623
ObjectIdGetDatum(RelationGetRelid(relation)));
2624
if (!HeapTupleIsValid(tuple))
2625
elog(ERROR, "could not find tuple for relation %u",
2626
RelationGetRelid(relation));
2627
classform = (Form_pg_class) GETSTRUCT(tuple);
2630
* Create storage for the main fork of the new relfilenode.
2632
* NOTE: any conflict in relfilenode value will be caught here, if
2633
* GetNewRelFileNode messes up for any reason.
2635
newrnode.node = relation->rd_node;
2636
newrnode.node.relNode = newrelfilenode;
2637
newrnode.backend = relation->rd_backend;
2638
RelationCreateStorage(newrnode.node, relation->rd_rel->relpersistence);
2639
smgrclosenode(newrnode);
2642
* Schedule unlinking of the old storage at transaction commit.
2644
RelationDropStorage(relation);
2647
* Now update the pg_class row. However, if we're dealing with a mapped
2648
* index, pg_class.relfilenode doesn't change; instead we have to send the
2649
* update to the relation mapper.
2651
if (RelationIsMapped(relation))
2652
RelationMapUpdateMap(RelationGetRelid(relation),
2654
relation->rd_rel->relisshared,
2657
classform->relfilenode = newrelfilenode;
2659
/* These changes are safe even for a mapped relation */
2660
if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
2662
classform->relpages = 0; /* it's empty until further notice */
2663
classform->reltuples = 0;
2665
classform->relfrozenxid = freezeXid;
2667
simple_heap_update(pg_class, &tuple->t_self, tuple);
2668
CatalogUpdateIndexes(pg_class, tuple);
2670
heap_freetuple(tuple);
2672
heap_close(pg_class, RowExclusiveLock);
2675
* Make the pg_class row change visible, as well as the relation map
2676
* change if any. This will cause the relcache entry to get updated, too.
2678
CommandCounterIncrement();
2681
* Mark the rel as having been given a new relfilenode in the current
2682
* (sub) transaction. This is a hint that can be used to optimize later
2683
* operations on the rel in the same transaction.
2685
relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
2686
/* ... and now we have eoxact cleanup work to do */
2687
need_eoxact_work = true;
2692
* RelationCacheInitialize
2694
* This initializes the relation descriptor cache. At the time
2695
* that this is invoked, we can't do database access yet (mainly
2696
* because the transaction subsystem is not up); all we are doing
2697
* is making an empty cache hashtable. This must be done before
2698
* starting the initialization transaction, because otherwise
2699
* AtEOXact_RelationCache would crash if that transaction aborts
2700
* before we can get the relcache set up.
2703
#define INITRELCACHESIZE 400
2706
RelationCacheInitialize(void)
2711
* make sure cache memory context exists
2713
if (!CacheMemoryContext)
2714
CreateCacheMemoryContext();
2717
* create hashtable that indexes the relcache
2719
MemSet(&ctl, 0, sizeof(ctl));
2720
ctl.keysize = sizeof(Oid);
2721
ctl.entrysize = sizeof(RelIdCacheEnt);
2722
ctl.hash = oid_hash;
2723
RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
2724
&ctl, HASH_ELEM | HASH_FUNCTION);
2727
* relation mapper needs to be initialized too
2729
RelationMapInitialize();
2733
* RelationCacheInitializePhase2
2735
* This is called to prepare for access to shared catalogs during startup.
2736
* We must at least set up nailed reldescs for pg_database, pg_authid,
2737
* and pg_auth_members. Ideally we'd like to have reldescs for their
2738
* indexes, too. We attempt to load this information from the shared
2739
* relcache init file. If that's missing or broken, just make phony
2740
* entries for the catalogs themselves. RelationCacheInitializePhase3
2741
* will clean up as needed.
2744
RelationCacheInitializePhase2(void)
2746
MemoryContext oldcxt;
2749
* relation mapper needs initialized too
2751
RelationMapInitializePhase2();
2754
* In bootstrap mode, the shared catalogs aren't there yet anyway, so do
2757
if (IsBootstrapProcessingMode())
2761
* switch to cache memory context
2763
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2766
* Try to load the shared relcache cache file. If unsuccessful, bootstrap
2767
* the cache with pre-made descriptors for the critical shared catalogs.
2769
if (!load_relcache_init_file(true))
2771
formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
2772
true, Natts_pg_database, Desc_pg_database);
2773
formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
2774
true, Natts_pg_authid, Desc_pg_authid);
2775
formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
2776
false, Natts_pg_auth_members, Desc_pg_auth_members);
2778
#define NUM_CRITICAL_SHARED_RELS 3 /* fix if you change list above */
2781
MemoryContextSwitchTo(oldcxt);
2785
* RelationCacheInitializePhase3
2787
* This is called as soon as the catcache and transaction system
2788
* are functional and we have determined MyDatabaseId. At this point
2789
* we can actually read data from the database's system catalogs.
2790
* We first try to read pre-computed relcache entries from the local
2791
* relcache init file. If that's missing or broken, make phony entries
2792
* for the minimum set of nailed-in-cache relations. Then (unless
2793
* bootstrapping) make sure we have entries for the critical system
2794
* indexes. Once we've done all this, we have enough infrastructure to
2795
* open any system catalog or use any catcache. The last step is to
2796
* rewrite the cache files if needed.
2799
RelationCacheInitializePhase3(void)
2801
HASH_SEQ_STATUS status;
2802
RelIdCacheEnt *idhentry;
2803
MemoryContext oldcxt;
2804
bool needNewCacheFile = !criticalSharedRelcachesBuilt;
2807
* relation mapper needs initialized too
2809
RelationMapInitializePhase3();
2812
* switch to cache memory context
2814
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2817
* Try to load the local relcache cache file. If unsuccessful, bootstrap
2818
* the cache with pre-made descriptors for the critical "nailed-in" system
2821
if (IsBootstrapProcessingMode() ||
2822
!load_relcache_init_file(false))
2824
needNewCacheFile = true;
2826
formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
2827
true, Natts_pg_class, Desc_pg_class);
2828
formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
2829
false, Natts_pg_attribute, Desc_pg_attribute);
2830
formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
2831
true, Natts_pg_proc, Desc_pg_proc);
2832
formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
2833
true, Natts_pg_type, Desc_pg_type);
2835
#define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
2838
MemoryContextSwitchTo(oldcxt);
2840
/* In bootstrap mode, the faked-up formrdesc info is all we'll have */
2841
if (IsBootstrapProcessingMode())
2845
* If we didn't get the critical system indexes loaded into relcache, do
2846
* so now. These are critical because the catcache and/or opclass cache
2847
* depend on them for fetches done during relcache load. Thus, we have an
2848
* infinite-recursion problem. We can break the recursion by doing
2849
* heapscans instead of indexscans at certain key spots. To avoid hobbling
2850
* performance, we only want to do that until we have the critical indexes
2851
* loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
2852
* decide whether to do heapscan or indexscan at the key spots, and we set
2853
* it true after we've loaded the critical indexes.
2855
* The critical indexes are marked as "nailed in cache", partly to make it
2856
* easy for load_relcache_init_file to count them, but mainly because we
2857
* cannot flush and rebuild them once we've set criticalRelcachesBuilt to
2858
* true. (NOTE: perhaps it would be possible to reload them by
2859
* temporarily setting criticalRelcachesBuilt to false again. For now,
2860
* though, we just nail 'em in.)
2862
* RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
2863
* in the same way as the others, because the critical catalogs don't
2864
* (currently) have any rules or triggers, and so these indexes can be
2865
* rebuilt without inducing recursion. However they are used during
2866
* relcache load when a rel does have rules or triggers, so we choose to
2867
* nail them for performance reasons.
2869
if (!criticalRelcachesBuilt)
2871
load_critical_index(ClassOidIndexId,
2872
RelationRelationId);
2873
load_critical_index(AttributeRelidNumIndexId,
2874
AttributeRelationId);
2875
load_critical_index(IndexRelidIndexId,
2877
load_critical_index(OpclassOidIndexId,
2878
OperatorClassRelationId);
2879
load_critical_index(AccessMethodProcedureIndexId,
2880
AccessMethodProcedureRelationId);
2881
load_critical_index(RewriteRelRulenameIndexId,
2883
load_critical_index(TriggerRelidNameIndexId,
2886
#define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
2888
criticalRelcachesBuilt = true;
2892
* Process critical shared indexes too.
2894
* DatabaseNameIndexId isn't critical for relcache loading, but rather for
2895
* initial lookup of MyDatabaseId, without which we'll never find any
2896
* non-shared catalogs at all. Autovacuum calls InitPostgres with a
2897
* database OID, so it instead depends on DatabaseOidIndexId. We also
2898
* need to nail up some indexes on pg_authid and pg_auth_members for use
2899
* during client authentication.
2901
if (!criticalSharedRelcachesBuilt)
2903
load_critical_index(DatabaseNameIndexId,
2904
DatabaseRelationId);
2905
load_critical_index(DatabaseOidIndexId,
2906
DatabaseRelationId);
2907
load_critical_index(AuthIdRolnameIndexId,
2909
load_critical_index(AuthIdOidIndexId,
2911
load_critical_index(AuthMemMemRoleIndexId,
2914
#define NUM_CRITICAL_SHARED_INDEXES 5 /* fix if you change list above */
2916
criticalSharedRelcachesBuilt = true;
2920
* Now, scan all the relcache entries and update anything that might be
2921
* wrong in the results from formrdesc or the relcache cache file. If we
2922
* faked up relcache entries using formrdesc, then read the real pg_class
2923
* rows and replace the fake entries with them. Also, if any of the
2924
* relcache entries have rules or triggers, load that info the hard way
2925
* since it isn't recorded in the cache file.
2927
* Whenever we access the catalogs to read data, there is a possibility of
2928
* a shared-inval cache flush causing relcache entries to be removed.
2929
* Since hash_seq_search only guarantees to still work after the *current*
2930
* entry is removed, it's unsafe to continue the hashtable scan afterward.
2931
* We handle this by restarting the scan from scratch after each access.
2932
* This is theoretically O(N^2), but the number of entries that actually
2933
* need to be fixed is small enough that it doesn't matter.
2935
hash_seq_init(&status, RelationIdCache);
2937
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2939
Relation relation = idhentry->reldesc;
2940
bool restart = false;
2943
* Make sure *this* entry doesn't get flushed while we work with it.
2945
RelationIncrementReferenceCount(relation);
2948
* If it's a faked-up entry, read the real pg_class tuple.
2950
if (relation->rd_rel->relowner == InvalidOid)
2955
htup = SearchSysCache1(RELOID,
2956
ObjectIdGetDatum(RelationGetRelid(relation)));
2957
if (!HeapTupleIsValid(htup))
2958
elog(FATAL, "cache lookup failed for relation %u",
2959
RelationGetRelid(relation));
2960
relp = (Form_pg_class) GETSTRUCT(htup);
2963
* Copy tuple to relation->rd_rel. (See notes in
2964
* AllocateRelationDesc())
2966
memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
2968
/* Update rd_options while we have the tuple */
2969
if (relation->rd_options)
2970
pfree(relation->rd_options);
2971
RelationParseRelOptions(relation, htup);
2974
* Check the values in rd_att were set up correctly. (We cannot
2975
* just copy them over now: formrdesc must have set up the rd_att
2976
* data correctly to start with, because it may already have been
2977
* copied into one or more catcache entries.)
2979
Assert(relation->rd_att->tdtypeid == relp->reltype);
2980
Assert(relation->rd_att->tdtypmod == -1);
2981
Assert(relation->rd_att->tdhasoid == relp->relhasoids);
2983
ReleaseSysCache(htup);
2985
/* relowner had better be OK now, else we'll loop forever */
2986
if (relation->rd_rel->relowner == InvalidOid)
2987
elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
2988
RelationGetRelationName(relation));
2994
* Fix data that isn't saved in relcache cache file.
2996
* relhasrules or relhastriggers could possibly be wrong or out of
2997
* date. If we don't actually find any rules or triggers, clear the
2998
* local copy of the flag so that we don't get into an infinite loop
2999
* here. We don't make any attempt to fix the pg_class entry, though.
3001
if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3003
RelationBuildRuleLock(relation);
3004
if (relation->rd_rules == NULL)
3005
relation->rd_rel->relhasrules = false;
3008
if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3010
RelationBuildTriggers(relation);
3011
if (relation->trigdesc == NULL)
3012
relation->rd_rel->relhastriggers = false;
3016
/* Release hold on the relation */
3017
RelationDecrementReferenceCount(relation);
3019
/* Now, restart the hashtable scan if needed */
3022
hash_seq_term(&status);
3023
hash_seq_init(&status, RelationIdCache);
3028
* Lastly, write out new relcache cache files if needed. We don't bother
3029
* to distinguish cases where only one of the two needs an update.
3031
if (needNewCacheFile)
3034
* Force all the catcaches to finish initializing and thereby open the
3035
* catalogs and indexes they use. This will preload the relcache with
3036
* entries for all the most important system catalogs and indexes, so
3037
* that the init files will be most useful for future backends.
3039
InitCatalogCachePhase2();
3041
/* reset initFileRelationIds list; we'll fill it during write */
3042
initFileRelationIds = NIL;
3044
/* now write the files */
3045
write_relcache_init_file(true);
3046
write_relcache_init_file(false);
3051
* Load one critical system index into the relcache
3053
* indexoid is the OID of the target index, heapoid is the OID of the catalog
3057
load_critical_index(Oid indexoid, Oid heapoid)
3062
* We must lock the underlying catalog before locking the index to avoid
3063
* deadlock, since RelationBuildDesc might well need to read the catalog,
3064
* and if anyone else is exclusive-locking this catalog and index they'll
3065
* be doing it in that order.
3067
LockRelationOid(heapoid, AccessShareLock);
3068
LockRelationOid(indexoid, AccessShareLock);
3069
ird = RelationBuildDesc(indexoid, true);
3071
elog(PANIC, "could not open critical system index %u", indexoid);
3072
ird->rd_isnailed = true;
3074
UnlockRelationOid(indexoid, AccessShareLock);
3075
UnlockRelationOid(heapoid, AccessShareLock);
3079
* GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
3080
* GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
3082
* We need this kluge because we have to be able to access non-fixed-width
3083
* fields of pg_class and pg_index before we have the standard catalog caches
3084
* available. We use predefined data that's set up in just the same way as
3085
* the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
3086
* not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
3087
* does it have a TupleConstr field. But it's good enough for the purpose of
3088
* extracting fields.
3091
BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs,
3095
MemoryContext oldcxt;
3098
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3100
result = CreateTemplateTupleDesc(natts, hasoids);
3101
result->tdtypeid = RECORDOID; /* not right, but we don't care */
3102
result->tdtypmod = -1;
3104
for (i = 0; i < natts; i++)
3106
memcpy(result->attrs[i], &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
3107
/* make sure attcacheoff is valid */
3108
result->attrs[i]->attcacheoff = -1;
3111
/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
3112
result->attrs[0]->attcacheoff = 0;
3114
/* Note: we don't bother to set up a TupleConstr entry */
3116
MemoryContextSwitchTo(oldcxt);
3122
GetPgClassDescriptor(void)
3124
static TupleDesc pgclassdesc = NULL;
3127
if (pgclassdesc == NULL)
3128
pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
3136
GetPgIndexDescriptor(void)
3138
static TupleDesc pgindexdesc = NULL;
3141
if (pgindexdesc == NULL)
3142
pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
3150
* Load any default attribute value definitions for the relation.
3153
AttrDefaultFetch(Relation relation)
3155
AttrDefault *attrdef = relation->rd_att->constr->defval;
3156
int ndef = relation->rd_att->constr->num_defval;
3167
Anum_pg_attrdef_adrelid,
3168
BTEqualStrategyNumber, F_OIDEQ,
3169
ObjectIdGetDatum(RelationGetRelid(relation)));
3171
adrel = heap_open(AttrDefaultRelationId, AccessShareLock);
3172
adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
3173
SnapshotNow, 1, &skey);
3176
while (HeapTupleIsValid(htup = systable_getnext(adscan)))
3178
Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
3180
for (i = 0; i < ndef; i++)
3182
if (adform->adnum != attrdef[i].adnum)
3184
if (attrdef[i].adbin != NULL)
3185
elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
3186
NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
3187
RelationGetRelationName(relation));
3191
val = fastgetattr(htup,
3192
Anum_pg_attrdef_adbin,
3193
adrel->rd_att, &isnull);
3195
elog(WARNING, "null adbin for attr %s of rel %s",
3196
NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
3197
RelationGetRelationName(relation));
3199
attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext,
3200
TextDatumGetCString(val));
3205
elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
3206
adform->adnum, RelationGetRelationName(relation));
3209
systable_endscan(adscan);
3210
heap_close(adrel, AccessShareLock);
3213
elog(WARNING, "%d attrdef record(s) missing for rel %s",
3214
ndef - found, RelationGetRelationName(relation));
3218
* Load any check constraints for the relation.
3221
CheckConstraintFetch(Relation relation)
3223
ConstrCheck *check = relation->rd_att->constr->check;
3224
int ncheck = relation->rd_att->constr->num_check;
3226
SysScanDesc conscan;
3227
ScanKeyData skey[1];
3233
ScanKeyInit(&skey[0],
3234
Anum_pg_constraint_conrelid,
3235
BTEqualStrategyNumber, F_OIDEQ,
3236
ObjectIdGetDatum(RelationGetRelid(relation)));
3238
conrel = heap_open(ConstraintRelationId, AccessShareLock);
3239
conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
3240
SnapshotNow, 1, skey);
3242
while (HeapTupleIsValid(htup = systable_getnext(conscan)))
3244
Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
3246
/* We want check constraints only */
3247
if (conform->contype != CONSTRAINT_CHECK)
3250
if (found >= ncheck)
3251
elog(ERROR, "unexpected constraint record found for rel %s",
3252
RelationGetRelationName(relation));
3254
check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
3255
NameStr(conform->conname));
3257
/* Grab and test conbin is actually set */
3258
val = fastgetattr(htup,
3259
Anum_pg_constraint_conbin,
3260
conrel->rd_att, &isnull);
3262
elog(ERROR, "null conbin for rel %s",
3263
RelationGetRelationName(relation));
3265
check[found].ccbin = MemoryContextStrdup(CacheMemoryContext,
3266
TextDatumGetCString(val));
3270
systable_endscan(conscan);
3271
heap_close(conrel, AccessShareLock);
3273
if (found != ncheck)
3274
elog(ERROR, "%d constraint record(s) missing for rel %s",
3275
ncheck - found, RelationGetRelationName(relation));
3279
* RelationGetIndexList -- get a list of OIDs of indexes on this relation
3281
* The index list is created only if someone requests it. We scan pg_index
3282
* to find relevant indexes, and add the list to the relcache entry so that
3283
* we won't have to compute it again. Note that shared cache inval of a
3284
* relcache entry will delete the old list and set rd_indexvalid to 0,
3285
* so that we must recompute the index list on next request. This handles
3286
* creation or deletion of an index.
3288
* The returned list is guaranteed to be sorted in order by OID. This is
3289
* needed by the executor, since for index types that we obtain exclusive
3290
* locks on when updating the index, all backends must lock the indexes in
3291
* the same order or we will get deadlocks (see ExecOpenIndices()). Any
3292
* consistent ordering would do, but ordering by OID is easy.
3294
* Since shared cache inval causes the relcache's copy of the list to go away,
3295
* we return a copy of the list palloc'd in the caller's context. The caller
3296
* may list_free() the returned list after scanning it. This is necessary
3297
* since the caller will typically be doing syscache lookups on the relevant
3298
* indexes, and syscache lookup could cause SI messages to be processed!
3300
* We also update rd_oidindex, which this module treats as effectively part
3301
* of the index list. rd_oidindex is valid when rd_indexvalid isn't zero;
3302
* it is the pg_class OID of a unique index on OID when the relation has one,
3303
* and InvalidOid if there is no such index.
3306
RelationGetIndexList(Relation relation)
3309
SysScanDesc indscan;
3314
MemoryContext oldcxt;
3316
/* Quick exit if we already computed the list. */
3317
if (relation->rd_indexvalid != 0)
3318
return list_copy(relation->rd_indexlist);
3321
* We build the list we intend to return (in the caller's context) while
3322
* doing the scan. After successfully completing the scan, we copy that
3323
* list into the relcache entry. This avoids cache-context memory leakage
3324
* if we get some sort of error partway through.
3327
oidIndex = InvalidOid;
3329
/* Prepare to scan pg_index for entries having indrelid = this rel. */
3331
Anum_pg_index_indrelid,
3332
BTEqualStrategyNumber, F_OIDEQ,
3333
ObjectIdGetDatum(RelationGetRelid(relation)));
3335
indrel = heap_open(IndexRelationId, AccessShareLock);
3336
indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
3337
SnapshotNow, 1, &skey);
3339
while (HeapTupleIsValid(htup = systable_getnext(indscan)))
3341
Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
3343
/* Add index's OID to result list in the proper order */
3344
result = insert_ordered_oid(result, index->indexrelid);
3346
/* Check to see if it is a unique, non-partial btree index on OID */
3347
if (index->indnatts == 1 &&
3348
index->indisunique && index->indimmediate &&
3349
index->indkey.values[0] == ObjectIdAttributeNumber &&
3350
index->indclass.values[0] == OID_BTREE_OPS_OID &&
3351
heap_attisnull(htup, Anum_pg_index_indpred))
3352
oidIndex = index->indexrelid;
3355
systable_endscan(indscan);
3356
heap_close(indrel, AccessShareLock);
3358
/* Now save a copy of the completed list in the relcache entry. */
3359
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3360
relation->rd_indexlist = list_copy(result);
3361
relation->rd_oidindex = oidIndex;
3362
relation->rd_indexvalid = 1;
3363
MemoryContextSwitchTo(oldcxt);
3369
* insert_ordered_oid
3370
* Insert a new Oid into a sorted list of Oids, preserving ordering
3372
* Building the ordered list this way is O(N^2), but with a pretty small
3373
* constant, so for the number of entries we expect it will probably be
3374
* faster than trying to apply qsort(). Most tables don't have very many
3378
insert_ordered_oid(List *list, Oid datum)
3382
/* Does the datum belong at the front? */
3383
if (list == NIL || datum < linitial_oid(list))
3384
return lcons_oid(datum, list);
3385
/* No, so find the entry it belongs after */
3386
prev = list_head(list);
3389
ListCell *curr = lnext(prev);
3391
if (curr == NULL || datum < lfirst_oid(curr))
3392
break; /* it belongs after 'prev', before 'curr' */
3396
/* Insert datum into list after 'prev' */
3397
lappend_cell_oid(list, prev, datum);
3402
* RelationSetIndexList -- externally force the index list contents
3404
* This is used to temporarily override what we think the set of valid
3405
* indexes is (including the presence or absence of an OID index).
3406
* The forcing will be valid only until transaction commit or abort.
3408
* This should only be applied to nailed relations, because in a non-nailed
3409
* relation the hacked index list could be lost at any time due to SI
3410
* messages. In practice it is only used on pg_class (see REINDEX).
3412
* It is up to the caller to make sure the given list is correctly ordered.
3414
* We deliberately do not change rd_indexattr here: even when operating
3415
* with a temporary partial index list, HOT-update decisions must be made
3416
* correctly with respect to the full index set. It is up to the caller
3417
* to ensure that a correct rd_indexattr set has been cached before first
3418
* calling RelationSetIndexList; else a subsequent inquiry might cause a
3419
* wrong rd_indexattr set to get computed and cached.
3422
RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
3424
MemoryContext oldcxt;
3426
Assert(relation->rd_isnailed);
3427
/* Copy the list into the cache context (could fail for lack of mem) */
3428
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3429
indexIds = list_copy(indexIds);
3430
MemoryContextSwitchTo(oldcxt);
3431
/* Okay to replace old list */
3432
list_free(relation->rd_indexlist);
3433
relation->rd_indexlist = indexIds;
3434
relation->rd_oidindex = oidIndex;
3435
relation->rd_indexvalid = 2; /* mark list as forced */
3436
/* must flag that we have a forced index list */
3437
need_eoxact_work = true;
3441
* RelationGetOidIndex -- get the pg_class OID of the relation's OID index
3443
* Returns InvalidOid if there is no such index.
3446
RelationGetOidIndex(Relation relation)
3451
* If relation doesn't have OIDs at all, caller is probably confused. (We
3452
* could just silently return InvalidOid, but it seems better to throw an
3455
Assert(relation->rd_rel->relhasoids);
3457
if (relation->rd_indexvalid == 0)
3459
/* RelationGetIndexList does the heavy lifting. */
3460
ilist = RelationGetIndexList(relation);
3462
Assert(relation->rd_indexvalid != 0);
3465
return relation->rd_oidindex;
3469
* RelationGetIndexExpressions -- get the index expressions for an index
3471
* We cache the result of transforming pg_index.indexprs into a node tree.
3472
* If the rel is not an index or has no expressional columns, we return NIL.
3473
* Otherwise, the returned tree is copied into the caller's memory context.
3474
* (We don't want to return a pointer to the relcache copy, since it could
3475
* disappear due to relcache invalidation.)
3478
RelationGetIndexExpressions(Relation relation)
3484
MemoryContext oldcxt;
3486
/* Quick exit if we already computed the result. */
3487
if (relation->rd_indexprs)
3488
return (List *) copyObject(relation->rd_indexprs);
3490
/* Quick exit if there is nothing to do. */
3491
if (relation->rd_indextuple == NULL ||
3492
heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs))
3496
* We build the tree we intend to return in the caller's context. After
3497
* successfully completing the work, we copy it into the relcache entry.
3498
* This avoids problems if we get some sort of error partway through.
3500
exprsDatum = heap_getattr(relation->rd_indextuple,
3501
Anum_pg_index_indexprs,
3502
GetPgIndexDescriptor(),
3505
exprsString = TextDatumGetCString(exprsDatum);
3506
result = (List *) stringToNode(exprsString);
3510
* Run the expressions through eval_const_expressions. This is not just an
3511
* optimization, but is necessary, because the planner will be comparing
3512
* them to similarly-processed qual clauses, and may fail to detect valid
3513
* matches without this. We don't bother with canonicalize_qual, however.
3515
result = (List *) eval_const_expressions(NULL, (Node *) result);
3518
* Also mark any coercion format fields as "don't care", so that the
3519
* planner can match to both explicit and implicit coercions.
3521
set_coercionform_dontcare((Node *) result);
3523
/* May as well fix opfuncids too */
3524
fix_opfuncids((Node *) result);
3526
/* Now save a copy of the completed tree in the relcache entry. */
3527
oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
3528
relation->rd_indexprs = (List *) copyObject(result);
3529
MemoryContextSwitchTo(oldcxt);
3535
* RelationGetIndexPredicate -- get the index predicate for an index
3537
* We cache the result of transforming pg_index.indpred into an implicit-AND
3538
* node tree (suitable for ExecQual).
3539
* If the rel is not an index or has no predicate, we return NIL.
3540
* Otherwise, the returned tree is copied into the caller's memory context.
3541
* (We don't want to return a pointer to the relcache copy, since it could
3542
* disappear due to relcache invalidation.)
3545
RelationGetIndexPredicate(Relation relation)
3551
MemoryContext oldcxt;
3553
/* Quick exit if we already computed the result. */
3554
if (relation->rd_indpred)
3555
return (List *) copyObject(relation->rd_indpred);
3557
/* Quick exit if there is nothing to do. */
3558
if (relation->rd_indextuple == NULL ||
3559
heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred))
3563
* We build the tree we intend to return in the caller's context. After
3564
* successfully completing the work, we copy it into the relcache entry.
3565
* This avoids problems if we get some sort of error partway through.
3567
predDatum = heap_getattr(relation->rd_indextuple,
3568
Anum_pg_index_indpred,
3569
GetPgIndexDescriptor(),
3572
predString = TextDatumGetCString(predDatum);
3573
result = (List *) stringToNode(predString);
3577
* Run the expression through const-simplification and canonicalization.
3578
* This is not just an optimization, but is necessary, because the planner
3579
* will be comparing it to similarly-processed qual clauses, and may fail
3580
* to detect valid matches without this. This must match the processing
3581
* done to qual clauses in preprocess_expression()! (We can skip the
3582
* stuff involving subqueries, however, since we don't allow any in index
3585
result = (List *) eval_const_expressions(NULL, (Node *) result);
3587
result = (List *) canonicalize_qual((Expr *) result);
3590
* Also mark any coercion format fields as "don't care", so that the
3591
* planner can match to both explicit and implicit coercions.
3593
set_coercionform_dontcare((Node *) result);
3595
/* Also convert to implicit-AND format */
3596
result = make_ands_implicit((Expr *) result);
3598
/* May as well fix opfuncids too */
3599
fix_opfuncids((Node *) result);
3601
/* Now save a copy of the completed tree in the relcache entry. */
3602
oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
3603
relation->rd_indpred = (List *) copyObject(result);
3604
MemoryContextSwitchTo(oldcxt);
3610
* RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
3612
* The result has a bit set for each attribute used anywhere in the index
3613
* definitions of all the indexes on this relation. (This includes not only
3614
* simple index keys, but attributes used in expressions and partial-index
3617
* Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
3618
* we can include system attributes (e.g., OID) in the bitmap representation.
3620
* Caller had better hold at least RowExclusiveLock on the target relation
3621
* to ensure that it has a stable set of indexes. This also makes it safe
3622
* (deadlock-free) for us to take locks on the relation's indexes.
3624
* The returned result is palloc'd in the caller's memory context and should
3625
* be bms_free'd when not needed anymore.
3628
RelationGetIndexAttrBitmap(Relation relation)
3630
Bitmapset *indexattrs;
3633
MemoryContext oldcxt;
3635
/* Quick exit if we already computed the result. */
3636
if (relation->rd_indexattr != NULL)
3637
return bms_copy(relation->rd_indexattr);
3639
/* Fast path if definitely no indexes */
3640
if (!RelationGetForm(relation)->relhasindex)
3644
* Get cached list of index OIDs
3646
indexoidlist = RelationGetIndexList(relation);
3648
/* Fall out if no indexes (but relhasindex was set) */
3649
if (indexoidlist == NIL)
3653
* For each index, add referenced attributes to indexattrs.
3656
foreach(l, indexoidlist)
3658
Oid indexOid = lfirst_oid(l);
3660
IndexInfo *indexInfo;
3663
indexDesc = index_open(indexOid, AccessShareLock);
3665
/* Extract index key information from the index's pg_index row */
3666
indexInfo = BuildIndexInfo(indexDesc);
3668
/* Collect simple attribute references */
3669
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
3671
int attrnum = indexInfo->ii_KeyAttrNumbers[i];
3674
indexattrs = bms_add_member(indexattrs,
3675
attrnum - FirstLowInvalidHeapAttributeNumber);
3678
/* Collect all attributes used in expressions, too */
3679
pull_varattnos((Node *) indexInfo->ii_Expressions, &indexattrs);
3681
/* Collect all attributes in the index predicate, too */
3682
pull_varattnos((Node *) indexInfo->ii_Predicate, &indexattrs);
3684
index_close(indexDesc, AccessShareLock);
3687
list_free(indexoidlist);
3689
/* Now save a copy of the bitmap in the relcache entry. */
3690
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3691
relation->rd_indexattr = bms_copy(indexattrs);
3692
MemoryContextSwitchTo(oldcxt);
3694
/* We return our original working copy for caller to play with */
3699
* RelationGetExclusionInfo -- get info about index's exclusion constraint
3701
* This should be called only for an index that is known to have an
3702
* associated exclusion constraint. It returns arrays (palloc'd in caller's
3703
* context) of the exclusion operator OIDs, their underlying functions'
3704
* OIDs, and their strategy numbers in the index's opclasses. We cache
3705
* all this information since it requires a fair amount of work to get.
3708
RelationGetExclusionInfo(Relation indexRelation,
3711
uint16 **strategies)
3713
int ncols = indexRelation->rd_rel->relnatts;
3718
SysScanDesc conscan;
3719
ScanKeyData skey[1];
3722
MemoryContext oldcxt;
3725
/* Allocate result space in caller context */
3726
*operators = ops = (Oid *) palloc(sizeof(Oid) * ncols);
3727
*procs = funcs = (Oid *) palloc(sizeof(Oid) * ncols);
3728
*strategies = strats = (uint16 *) palloc(sizeof(uint16) * ncols);
3730
/* Quick exit if we have the data cached already */
3731
if (indexRelation->rd_exclstrats != NULL)
3733
memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * ncols);
3734
memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * ncols);
3735
memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * ncols);
3740
* Search pg_constraint for the constraint associated with the index. To
3741
* make this not too painfully slow, we use the index on conrelid; that
3742
* will hold the parent relation's OID not the index's own OID.
3744
ScanKeyInit(&skey[0],
3745
Anum_pg_constraint_conrelid,
3746
BTEqualStrategyNumber, F_OIDEQ,
3747
ObjectIdGetDatum(indexRelation->rd_index->indrelid));
3749
conrel = heap_open(ConstraintRelationId, AccessShareLock);
3750
conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
3751
SnapshotNow, 1, skey);
3754
while (HeapTupleIsValid(htup = systable_getnext(conscan)))
3756
Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
3762
/* We want the exclusion constraint owning the index */
3763
if (conform->contype != CONSTRAINT_EXCLUSION ||
3764
conform->conindid != RelationGetRelid(indexRelation))
3767
/* There should be only one */
3769
elog(ERROR, "unexpected exclusion constraint record found for rel %s",
3770
RelationGetRelationName(indexRelation));
3773
/* Extract the operator OIDS from conexclop */
3774
val = fastgetattr(htup,
3775
Anum_pg_constraint_conexclop,
3776
conrel->rd_att, &isnull);
3778
elog(ERROR, "null conexclop for rel %s",
3779
RelationGetRelationName(indexRelation));
3781
arr = DatumGetArrayTypeP(val); /* ensure not toasted */
3782
nelem = ARR_DIMS(arr)[0];
3783
if (ARR_NDIM(arr) != 1 ||
3786
ARR_ELEMTYPE(arr) != OIDOID)
3787
elog(ERROR, "conexclop is not a 1-D Oid array");
3789
memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * ncols);
3792
systable_endscan(conscan);
3793
heap_close(conrel, AccessShareLock);
3796
elog(ERROR, "exclusion constraint record missing for rel %s",
3797
RelationGetRelationName(indexRelation));
3799
/* We need the func OIDs and strategy numbers too */
3800
for (i = 0; i < ncols; i++)
3802
funcs[i] = get_opcode(ops[i]);
3803
strats[i] = get_op_opfamily_strategy(ops[i],
3804
indexRelation->rd_opfamily[i]);
3805
/* shouldn't fail, since it was checked at index creation */
3806
if (strats[i] == InvalidStrategy)
3807
elog(ERROR, "could not find strategy for operator %u in family %u",
3808
ops[i], indexRelation->rd_opfamily[i]);
3811
/* Save a copy of the results in the relcache entry. */
3812
oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
3813
indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * ncols);
3814
indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * ncols);
3815
indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * ncols);
3816
memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * ncols);
3817
memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * ncols);
3818
memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * ncols);
3819
MemoryContextSwitchTo(oldcxt);
3824
* load_relcache_init_file, write_relcache_init_file
3826
* In late 1992, we started regularly having databases with more than
3827
* a thousand classes in them. With this number of classes, it became
3828
* critical to do indexed lookups on the system catalogs.
3830
* Bootstrapping these lookups is very hard. We want to be able to
3831
* use an index on pg_attribute, for example, but in order to do so,
3832
* we must have read pg_attribute for the attributes in the index,
3833
* which implies that we need to use the index.
3835
* In order to get around the problem, we do the following:
3837
* + When the database system is initialized (at initdb time), we
3838
* don't use indexes. We do sequential scans.
3840
* + When the backend is started up in normal mode, we load an image
3841
* of the appropriate relation descriptors, in internal format,
3842
* from an initialization file in the data/base/... directory.
3844
* + If the initialization file isn't there, then we create the
3845
* relation descriptors using sequential scans and write 'em to
3846
* the initialization file for use by subsequent backends.
3848
* As of Postgres 9.0, there is one local initialization file in each
3849
* database, plus one shared initialization file for shared catalogs.
3851
* We could dispense with the initialization files and just build the
3852
* critical reldescs the hard way on every backend startup, but that
3853
* slows down backend startup noticeably.
3855
* We can in fact go further, and save more relcache entries than
3856
* just the ones that are absolutely critical; this allows us to speed
3857
* up backend startup by not having to build such entries the hard way.
3858
* Presently, all the catalog and index entries that are referred to
3859
* by catcaches are stored in the initialization files.
3861
* The same mechanism that detects when catcache and relcache entries
3862
* need to be invalidated (due to catalog updates) also arranges to
3863
* unlink the initialization files when the contents may be out of date.
3864
* The files will then be rebuilt during the next backend startup.
3868
* load_relcache_init_file -- attempt to load cache from the shared
3869
* or local cache init file
3871
* If successful, return TRUE and set criticalRelcachesBuilt or
3872
* criticalSharedRelcachesBuilt to true.
3873
* If not successful, return FALSE.
3875
* NOTE: we assume we are already switched into CacheMemoryContext.
3878
load_relcache_init_file(bool shared)
3881
char initfilename[MAXPGPATH];
3892
snprintf(initfilename, sizeof(initfilename), "global/%s",
3893
RELCACHE_INIT_FILENAME);
3895
snprintf(initfilename, sizeof(initfilename), "%s/%s",
3896
DatabasePath, RELCACHE_INIT_FILENAME);
3898
fp = AllocateFile(initfilename, PG_BINARY_R);
3903
* Read the index relcache entries from the file. Note we will not enter
3904
* any of them into the cache if the read fails partway through; this
3905
* helps to guard against broken init files.
3908
rels = (Relation *) palloc(max_rels * sizeof(Relation));
3910
nailed_rels = nailed_indexes = 0;
3912
/* check for correct magic number (compatible version) */
3913
if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3915
if (magic != RELCACHE_INIT_FILEMAGIC)
3918
for (relno = 0;; relno++)
3923
Form_pg_class relform;
3926
/* first read the relation descriptor length */
3927
nread = fread(&len, 1, sizeof(len), fp);
3928
if (nread != sizeof(len))
3931
break; /* end of file */
3935
/* safety check for incompatible relcache layout */
3936
if (len != sizeof(RelationData))
3939
/* allocate another relcache header */
3940
if (num_rels >= max_rels)
3943
rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
3946
rel = rels[num_rels++] = (Relation) palloc(len);
3948
/* then, read the Relation structure */
3949
if (fread(rel, 1, len, fp) != len)
3952
/* next read the relation tuple form */
3953
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
3956
relform = (Form_pg_class) palloc(len);
3957
if (fread(relform, 1, len, fp) != len)
3960
rel->rd_rel = relform;
3962
/* initialize attribute tuple forms */
3963
rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
3964
relform->relhasoids);
3965
rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3967
rel->rd_att->tdtypeid = relform->reltype;
3968
rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
3970
/* next read all the attribute tuple form data entries */
3971
has_not_null = false;
3972
for (i = 0; i < relform->relnatts; i++)
3974
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
3976
if (len != ATTRIBUTE_FIXED_PART_SIZE)
3978
if (fread(rel->rd_att->attrs[i], 1, len, fp) != len)
3981
has_not_null |= rel->rd_att->attrs[i]->attnotnull;
3984
/* next read the access method specific field */
3985
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
3989
rel->rd_options = palloc(len);
3990
if (fread(rel->rd_options, 1, len, fp) != len)
3992
if (len != VARSIZE(rel->rd_options))
3993
goto read_failed; /* sanity check */
3997
rel->rd_options = NULL;
4000
/* mark not-null status */
4003
TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
4005
constr->has_not_null = true;
4006
rel->rd_att->constr = constr;
4009
/* If it's an index, there's more to do */
4010
if (rel->rd_rel->relkind == RELKIND_INDEX)
4013
MemoryContext indexcxt;
4016
RegProcedure *support;
4021
/* Count nailed indexes to ensure we have 'em all */
4022
if (rel->rd_isnailed)
4025
/* next, read the pg_index tuple */
4026
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4029
rel->rd_indextuple = (HeapTuple) palloc(len);
4030
if (fread(rel->rd_indextuple, 1, len, fp) != len)
4033
/* Fix up internal pointers in the tuple -- see heap_copytuple */
4034
rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
4035
rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
4037
/* next, read the access method tuple form */
4038
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4041
am = (Form_pg_am) palloc(len);
4042
if (fread(am, 1, len, fp) != len)
4047
* prepare index info context --- parameters should match
4048
* RelationInitIndexAccessInfo
4050
indexcxt = AllocSetContextCreate(CacheMemoryContext,
4051
RelationGetRelationName(rel),
4052
ALLOCSET_SMALL_MINSIZE,
4053
ALLOCSET_SMALL_INITSIZE,
4054
ALLOCSET_SMALL_MAXSIZE);
4055
rel->rd_indexcxt = indexcxt;
4057
/* next, read the vector of opfamily OIDs */
4058
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4061
opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
4062
if (fread(opfamily, 1, len, fp) != len)
4065
rel->rd_opfamily = opfamily;
4067
/* next, read the vector of opcintype OIDs */
4068
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4071
opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
4072
if (fread(opcintype, 1, len, fp) != len)
4075
rel->rd_opcintype = opcintype;
4077
/* next, read the vector of support procedure OIDs */
4078
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4080
support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
4081
if (fread(support, 1, len, fp) != len)
4084
rel->rd_support = support;
4086
/* next, read the vector of collation OIDs */
4087
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4090
indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
4091
if (fread(indcollation, 1, len, fp) != len)
4094
rel->rd_indcollation = indcollation;
4096
/* finally, read the vector of indoption values */
4097
if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4100
indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
4101
if (fread(indoption, 1, len, fp) != len)
4104
rel->rd_indoption = indoption;
4106
/* set up zeroed fmgr-info vectors */
4107
rel->rd_aminfo = (RelationAmInfo *)
4108
MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
4109
nsupport = relform->relnatts * am->amsupport;
4110
rel->rd_supportinfo = (FmgrInfo *)
4111
MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
4115
/* Count nailed rels to ensure we have 'em all */
4116
if (rel->rd_isnailed)
4119
Assert(rel->rd_index == NULL);
4120
Assert(rel->rd_indextuple == NULL);
4121
Assert(rel->rd_am == NULL);
4122
Assert(rel->rd_indexcxt == NULL);
4123
Assert(rel->rd_aminfo == NULL);
4124
Assert(rel->rd_opfamily == NULL);
4125
Assert(rel->rd_opcintype == NULL);
4126
Assert(rel->rd_support == NULL);
4127
Assert(rel->rd_supportinfo == NULL);
4128
Assert(rel->rd_indoption == NULL);
4129
Assert(rel->rd_indcollation == NULL);
4133
* Rules and triggers are not saved (mainly because the internal
4134
* format is complex and subject to change). They must be rebuilt if
4135
* needed by RelationCacheInitializePhase3. This is not expected to
4136
* be a big performance hit since few system catalogs have such. Ditto
4137
* for index expressions, predicates, and exclusion info.
4139
rel->rd_rules = NULL;
4140
rel->rd_rulescxt = NULL;
4141
rel->trigdesc = NULL;
4142
rel->rd_indexprs = NIL;
4143
rel->rd_indpred = NIL;
4144
rel->rd_exclops = NULL;
4145
rel->rd_exclprocs = NULL;
4146
rel->rd_exclstrats = NULL;
4149
* Reset transient-state fields in the relcache entry
4151
rel->rd_smgr = NULL;
4152
if (rel->rd_isnailed)
4156
rel->rd_indexvalid = 0;
4157
rel->rd_indexlist = NIL;
4158
rel->rd_indexattr = NULL;
4159
rel->rd_oidindex = InvalidOid;
4160
rel->rd_createSubid = InvalidSubTransactionId;
4161
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
4162
rel->rd_amcache = NULL;
4163
MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
4166
* Recompute lock and physical addressing info. This is needed in
4167
* case the pg_internal.init file was copied from some other database
4168
* by CREATE DATABASE.
4170
RelationInitLockInfo(rel);
4171
RelationInitPhysicalAddr(rel);
4175
* We reached the end of the init file without apparent problem. Did we
4176
* get the right number of nailed items? (This is a useful crosscheck in
4177
* case the set of critical rels or indexes changes.)
4181
if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
4182
nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
4187
if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
4188
nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
4193
* OK, all appears well.
4195
* Now insert all the new relcache entries into the cache.
4197
for (relno = 0; relno < num_rels; relno++)
4199
RelationCacheInsert(rels[relno]);
4200
/* also make a list of their OIDs, for RelationIdIsInInitFile */
4202
initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
4203
initFileRelationIds);
4210
criticalSharedRelcachesBuilt = true;
4212
criticalRelcachesBuilt = true;
4216
* init file is broken, so do it the hard way. We don't bother trying to
4217
* free the clutter we just allocated; it's not in the relcache so it
4228
* Write out a new initialization file with the current contents
4229
* of the relcache (either shared rels or local rels, as indicated).
4232
write_relcache_init_file(bool shared)
4235
char tempfilename[MAXPGPATH];
4236
char finalfilename[MAXPGPATH];
4238
HASH_SEQ_STATUS status;
4239
RelIdCacheEnt *idhentry;
4240
MemoryContext oldcxt;
4244
* We must write a temporary file and rename it into place. Otherwise,
4245
* another backend starting at about the same time might crash trying to
4246
* read the partially-complete file.
4250
snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
4251
RELCACHE_INIT_FILENAME, MyProcPid);
4252
snprintf(finalfilename, sizeof(finalfilename), "global/%s",
4253
RELCACHE_INIT_FILENAME);
4257
snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
4258
DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
4259
snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
4260
DatabasePath, RELCACHE_INIT_FILENAME);
4263
unlink(tempfilename); /* in case it exists w/wrong permissions */
4265
fp = AllocateFile(tempfilename, PG_BINARY_W);
4269
* We used to consider this a fatal error, but we might as well
4270
* continue with backend startup ...
4273
(errcode_for_file_access(),
4274
errmsg("could not create relation-cache initialization file \"%s\": %m",
4276
errdetail("Continuing anyway, but there's something wrong.")));
4281
* Write a magic number to serve as a file version identifier. We can
4282
* change the magic number whenever the relcache layout changes.
4284
magic = RELCACHE_INIT_FILEMAGIC;
4285
if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
4286
elog(FATAL, "could not write init file");
4289
* Write all the appropriate reldescs (in no particular order).
4291
hash_seq_init(&status, RelationIdCache);
4293
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
4295
Relation rel = idhentry->reldesc;
4296
Form_pg_class relform = rel->rd_rel;
4298
/* ignore if not correct group */
4299
if (relform->relisshared != shared)
4302
/* first write the relcache entry proper */
4303
write_item(rel, sizeof(RelationData), fp);
4305
/* next write the relation tuple form */
4306
write_item(relform, CLASS_TUPLE_SIZE, fp);
4308
/* next, do all the attribute tuple form data entries */
4309
for (i = 0; i < relform->relnatts; i++)
4311
write_item(rel->rd_att->attrs[i], ATTRIBUTE_FIXED_PART_SIZE, fp);
4314
/* next, do the access method specific field */
4315
write_item(rel->rd_options,
4316
(rel->rd_options ? VARSIZE(rel->rd_options) : 0),
4319
/* If it's an index, there's more to do */
4320
if (rel->rd_rel->relkind == RELKIND_INDEX)
4322
Form_pg_am am = rel->rd_am;
4324
/* write the pg_index tuple */
4325
/* we assume this was created by heap_copytuple! */
4326
write_item(rel->rd_indextuple,
4327
HEAPTUPLESIZE + rel->rd_indextuple->t_len,
4330
/* next, write the access method tuple form */
4331
write_item(am, sizeof(FormData_pg_am), fp);
4333
/* next, write the vector of opfamily OIDs */
4334
write_item(rel->rd_opfamily,
4335
relform->relnatts * sizeof(Oid),
4338
/* next, write the vector of opcintype OIDs */
4339
write_item(rel->rd_opcintype,
4340
relform->relnatts * sizeof(Oid),
4343
/* next, write the vector of support procedure OIDs */
4344
write_item(rel->rd_support,
4345
relform->relnatts * (am->amsupport * sizeof(RegProcedure)),
4348
/* next, write the vector of collation OIDs */
4349
write_item(rel->rd_indcollation,
4350
relform->relnatts * sizeof(Oid),
4353
/* finally, write the vector of indoption values */
4354
write_item(rel->rd_indoption,
4355
relform->relnatts * sizeof(int16),
4359
/* also make a list of their OIDs, for RelationIdIsInInitFile */
4362
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4363
initFileRelationIds = lcons_oid(RelationGetRelid(rel),
4364
initFileRelationIds);
4365
MemoryContextSwitchTo(oldcxt);
4370
elog(FATAL, "could not write init file");
4373
* Now we have to check whether the data we've so painstakingly
4374
* accumulated is already obsolete due to someone else's just-committed
4375
* catalog changes. If so, we just delete the temp file and leave it to
4376
* the next backend to try again. (Our own relcache entries will be
4377
* updated by SI message processing, but we can't be sure whether what we
4378
* wrote out was up-to-date.)
4380
* This mustn't run concurrently with RelationCacheInitFileInvalidate, so
4381
* grab a serialization lock for the duration.
4383
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
4385
/* Make sure we have seen all incoming SI messages */
4386
AcceptInvalidationMessages();
4389
* If we have received any SI relcache invals since backend start, assume
4390
* we may have written out-of-date data.
4392
if (relcacheInvalsReceived == 0L)
4395
* OK, rename the temp file to its final name, deleting any
4396
* previously-existing init file.
4398
* Note: a failure here is possible under Cygwin, if some other
4399
* backend is holding open an unlinked-but-not-yet-gone init file. So
4400
* treat this as a noncritical failure; just remove the useless temp
4403
if (rename(tempfilename, finalfilename) < 0)
4404
unlink(tempfilename);
4408
/* Delete the already-obsolete temp file */
4409
unlink(tempfilename);
4412
LWLockRelease(RelCacheInitLock);
4415
/* write a chunk of data preceded by its length */
4417
write_item(const void *data, Size len, FILE *fp)
4419
if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
4420
elog(FATAL, "could not write init file");
4421
if (fwrite(data, 1, len, fp) != len)
4422
elog(FATAL, "could not write init file");
4426
* Detect whether a given relation (identified by OID) is one of the ones
4427
* we store in the local relcache init file.
4429
* Note that we effectively assume that all backends running in a database
4430
* would choose to store the same set of relations in the init file;
4431
* otherwise there are cases where we'd fail to detect the need for an init
4432
* file invalidation. This does not seem likely to be a problem in practice.
4435
RelationIdIsInInitFile(Oid relationId)
4437
return list_member_oid(initFileRelationIds, relationId);
4441
* Invalidate (remove) the init file during commit of a transaction that
4442
* changed one or more of the relation cache entries that are kept in the
4445
* We actually need to remove the init file twice: once just before sending
4446
* the SI messages that include relcache inval for such relations, and once
4447
* just after sending them. The unlink before ensures that a backend that's
4448
* currently starting cannot read the now-obsolete init file and then miss
4449
* the SI messages that will force it to update its relcache entries. (This
4450
* works because the backend startup sequence gets into the PGPROC array before
4451
* trying to load the init file.) The unlink after is to synchronize with a
4452
* backend that may currently be trying to write an init file based on data
4453
* that we've just rendered invalid. Such a backend will see the SI messages,
4454
* but we can't leave the init file sitting around to fool later backends.
4456
* Ignore any failure to unlink the file, since it might not be there if
4457
* no backend has been started since the last removal.
4459
* Notice this deals only with the local init file, not the shared init file.
4460
* The reason is that there can never be a "significant" change to the
4461
* relcache entry of a shared relation; the most that could happen is
4462
* updates of noncritical fields such as relpages/reltuples. So, while
4463
* it's worth updating the shared init file from time to time, it can never
4464
* be invalid enough to make it necessary to remove it.
4467
RelationCacheInitFileInvalidate(bool beforeSend)
4469
char initfilename[MAXPGPATH];
4471
snprintf(initfilename, sizeof(initfilename), "%s/%s",
4472
DatabasePath, RELCACHE_INIT_FILENAME);
4476
/* no interlock needed here */
4477
unlink(initfilename);
4482
* We need to interlock this against write_relcache_init_file, to
4483
* guard against possibility that someone renames a new-but-
4484
* already-obsolete init file into place just after we unlink. With
4485
* the interlock, it's certain that write_relcache_init_file will
4486
* notice our SI inval message before renaming into place, or else
4487
* that we will execute second and successfully unlink the file.
4489
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
4490
unlink(initfilename);
4491
LWLockRelease(RelCacheInitLock);
4496
* Remove the init files during postmaster startup.
4498
* We used to keep the init files across restarts, but that is unsafe in PITR
4499
* scenarios, and even in simple crash-recovery cases there are windows for
4500
* the init files to become out-of-sync with the database. So now we just
4501
* remove them during startup and expect the first backend launch to rebuild
4502
* them. Of course, this has to happen in each database of the cluster.
4505
RelationCacheInitFileRemove(void)
4507
const char *tblspcdir = "pg_tblspc";
4510
char path[MAXPGPATH];
4513
* We zap the shared cache file too. In theory it can't get out of sync
4514
* enough to be a problem, but in data-corruption cases, who knows ...
4516
snprintf(path, sizeof(path), "global/%s",
4517
RELCACHE_INIT_FILENAME);
4518
unlink_initfile(path);
4520
/* Scan everything in the default tablespace */
4521
RelationCacheInitFileRemoveInDir("base");
4523
/* Scan the tablespace link directory to find non-default tablespaces */
4524
dir = AllocateDir(tblspcdir);
4527
elog(LOG, "could not open tablespace link directory \"%s\": %m",
4532
while ((de = ReadDir(dir, tblspcdir)) != NULL)
4534
if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
4536
/* Scan the tablespace dir for per-database dirs */
4537
snprintf(path, sizeof(path), "%s/%s/%s",
4538
tblspcdir, de->d_name, TABLESPACE_VERSION_DIRECTORY);
4539
RelationCacheInitFileRemoveInDir(path);
4546
/* Process one per-tablespace directory for RelationCacheInitFileRemove */
4548
RelationCacheInitFileRemoveInDir(const char *tblspcpath)
4552
char initfilename[MAXPGPATH];
4554
/* Scan the tablespace directory to find per-database directories */
4555
dir = AllocateDir(tblspcpath);
4558
elog(LOG, "could not open tablespace directory \"%s\": %m",
4563
while ((de = ReadDir(dir, tblspcpath)) != NULL)
4565
if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
4567
/* Try to remove the init file in each database */
4568
snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
4569
tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
4570
unlink_initfile(initfilename);
4578
unlink_initfile(const char *initfilename)
4580
if (unlink(initfilename) < 0)
4582
/* It might not be there, but log any error other than ENOENT */
4583
if (errno != ENOENT)
4584
elog(LOG, "could not remove cache file \"%s\": %m", initfilename);