1
/*-------------------------------------------------------------------------
4
* code to create and destroy POSTGRES index relations
6
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
15
* index_create() - Create a cataloged index relation
16
* index_drop() - Removes index relation from catalogs
17
* BuildIndexInfo() - Prepare to insert index tuples
18
* FormIndexDatum() - Construct datum vector for one index tuple
20
*-------------------------------------------------------------------------
26
#include "access/genam.h"
27
#include "access/heapam.h"
28
#include "access/relscan.h"
29
#include "access/sysattr.h"
30
#include "access/transam.h"
31
#include "access/xact.h"
32
#include "bootstrap/bootstrap.h"
33
#include "catalog/catalog.h"
34
#include "catalog/dependency.h"
35
#include "catalog/heap.h"
36
#include "catalog/index.h"
37
#include "catalog/indexing.h"
38
#include "catalog/namespace.h"
39
#include "catalog/pg_constraint.h"
40
#include "catalog/pg_operator.h"
41
#include "catalog/pg_opclass.h"
42
#include "catalog/pg_tablespace.h"
43
#include "catalog/pg_type.h"
44
#include "catalog/storage.h"
45
#include "commands/tablecmds.h"
46
#include "executor/executor.h"
47
#include "miscadmin.h"
48
#include "nodes/nodeFuncs.h"
49
#include "optimizer/clauses.h"
50
#include "optimizer/var.h"
51
#include "storage/bufmgr.h"
52
#include "storage/lmgr.h"
53
#include "storage/procarray.h"
54
#include "storage/smgr.h"
55
#include "utils/builtins.h"
56
#include "utils/fmgroids.h"
57
#include "utils/inval.h"
58
#include "utils/lsyscache.h"
59
#include "utils/memutils.h"
60
#include "utils/relcache.h"
61
#include "utils/syscache.h"
62
#include "utils/tuplesort.h"
63
#include "utils/snapmgr.h"
64
#include "utils/tqual.h"
67
/* state info for validate_index bulkdelete callback */
70
Tuplesortstate *tuplesort; /* for sorting the index TIDs */
71
/* statistics (for debug purposes only): */
77
/* non-export function prototypes */
78
static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
80
Oid accessMethodObjectId,
82
static void InitializeAttributeOids(Relation indexRelation,
83
int numatts, Oid indexoid);
84
static void AppendAttributeTuples(Relation indexRelation, int numatts);
85
static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
91
static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
92
Oid reltoastidxid, double reltuples);
93
static bool validate_index_callback(ItemPointer itemptr, void *opaque);
94
static void validate_index_heapscan(Relation heapRelation,
95
Relation indexRelation,
99
static Oid IndexGetRelation(Oid indexId);
103
* ConstructTupleDescriptor
105
* Build an index tuple descriptor for a new index
108
ConstructTupleDescriptor(Relation heapRelation,
109
IndexInfo *indexInfo,
110
Oid accessMethodObjectId,
113
int numatts = indexInfo->ii_NumIndexAttrs;
114
ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
117
TupleDesc heapTupDesc;
118
TupleDesc indexTupDesc;
119
int natts; /* #atts in heap rel --- for error checks */
122
/* We need access to the index AM's pg_am tuple */
123
amtuple = SearchSysCache(AMOID,
124
ObjectIdGetDatum(accessMethodObjectId),
126
if (!HeapTupleIsValid(amtuple))
127
elog(ERROR, "cache lookup failed for access method %u",
128
accessMethodObjectId);
129
amform = (Form_pg_am) GETSTRUCT(amtuple);
131
/* ... and to the table's tuple descriptor */
132
heapTupDesc = RelationGetDescr(heapRelation);
133
natts = RelationGetForm(heapRelation)->relnatts;
136
* allocate the new tuple descriptor
138
indexTupDesc = CreateTemplateTupleDesc(numatts, false);
141
* For simple index columns, we copy the pg_attribute row from the parent
142
* relation and modify it as necessary. For expressions we have to cons
143
* up a pg_attribute row the hard way.
145
for (i = 0; i < numatts; i++)
147
AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i];
148
Form_pg_attribute to = indexTupDesc->attrs[i];
150
Form_pg_type typeTup;
151
Form_pg_opclass opclassTup;
156
/* Simple index column */
157
Form_pg_attribute from;
162
* here we are indexing on a system attribute (-1...-n)
164
from = SystemAttributeDefinition(atnum,
165
heapRelation->rd_rel->relhasoids);
170
* here we are indexing on a normal attribute (1...n)
172
if (atnum > natts) /* safety check */
173
elog(ERROR, "invalid column number %d", atnum);
174
from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
178
* now that we've determined the "from", let's copy the tuple desc
181
memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
184
* Fix the stuff that should not be the same as the underlying
189
to->attstattarget = -1;
190
to->attcacheoff = -1;
191
to->attnotnull = false;
192
to->atthasdef = false;
193
to->attislocal = true;
198
/* Expressional index */
201
MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
203
if (indexpr_item == NULL) /* shouldn't happen */
204
elog(ERROR, "too few entries in indexprs list");
205
indexkey = (Node *) lfirst(indexpr_item);
206
indexpr_item = lnext(indexpr_item);
209
* Make the attribute's name "pg_expresssion_nnn" (maybe think of
210
* something better later)
212
sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);
215
* Lookup the expression type in pg_type for the type length etc.
217
keyType = exprType(indexkey);
218
tuple = SearchSysCache(TYPEOID,
219
ObjectIdGetDatum(keyType),
221
if (!HeapTupleIsValid(tuple))
222
elog(ERROR, "cache lookup failed for type %u", keyType);
223
typeTup = (Form_pg_type) GETSTRUCT(tuple);
226
* Assign some of the attributes values. Leave the rest as 0.
229
to->atttypid = keyType;
230
to->attlen = typeTup->typlen;
231
to->attbyval = typeTup->typbyval;
232
to->attstorage = typeTup->typstorage;
233
to->attalign = typeTup->typalign;
234
to->attstattarget = -1;
235
to->attcacheoff = -1;
237
to->attislocal = true;
239
ReleaseSysCache(tuple);
242
* Make sure the expression yields a type that's safe to store in
243
* an index. We need this defense because we have index opclasses
244
* for pseudo-types such as "record", and the actually stored type
245
* had better be safe; eg, a named composite type is okay, an
246
* anonymous record type is not. The test is the same as for
247
* whether a table column is of a safe type (which is why we
248
* needn't check for the non-expression case).
250
CheckAttributeType(NameStr(to->attname), to->atttypid);
254
* We do not yet have the correct relation OID for the index, so just
255
* set it invalid for now. InitializeAttributeOids() will fix it
258
to->attrelid = InvalidOid;
261
* Check the opclass and index AM to see if either provides a keytype
262
* (overriding the attribute type). Opclass takes precedence.
264
tuple = SearchSysCache(CLAOID,
265
ObjectIdGetDatum(classObjectId[i]),
267
if (!HeapTupleIsValid(tuple))
268
elog(ERROR, "cache lookup failed for opclass %u",
270
opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
271
if (OidIsValid(opclassTup->opckeytype))
272
keyType = opclassTup->opckeytype;
274
keyType = amform->amkeytype;
275
ReleaseSysCache(tuple);
277
if (OidIsValid(keyType) && keyType != to->atttypid)
279
/* index value and heap value have different types */
280
tuple = SearchSysCache(TYPEOID,
281
ObjectIdGetDatum(keyType),
283
if (!HeapTupleIsValid(tuple))
284
elog(ERROR, "cache lookup failed for type %u", keyType);
285
typeTup = (Form_pg_type) GETSTRUCT(tuple);
287
to->atttypid = keyType;
289
to->attlen = typeTup->typlen;
290
to->attbyval = typeTup->typbyval;
291
to->attalign = typeTup->typalign;
292
to->attstorage = typeTup->typstorage;
294
ReleaseSysCache(tuple);
298
ReleaseSysCache(amtuple);
303
/* ----------------------------------------------------------------
304
* InitializeAttributeOids
305
* ----------------------------------------------------------------
308
InitializeAttributeOids(Relation indexRelation,
312
TupleDesc tupleDescriptor;
315
tupleDescriptor = RelationGetDescr(indexRelation);
317
for (i = 0; i < numatts; i += 1)
318
tupleDescriptor->attrs[i]->attrelid = indexoid;
321
/* ----------------------------------------------------------------
322
* AppendAttributeTuples
323
* ----------------------------------------------------------------
326
AppendAttributeTuples(Relation indexRelation, int numatts)
328
Relation pg_attribute;
329
CatalogIndexState indstate;
330
TupleDesc indexTupDesc;
334
* open the attribute relation and its indexes
336
pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
338
indstate = CatalogOpenIndexes(pg_attribute);
341
* insert data from new index's tupdesc into pg_attribute
343
indexTupDesc = RelationGetDescr(indexRelation);
345
for (i = 0; i < numatts; i++)
348
* There used to be very grotty code here to set these fields, but I
349
* think it's unnecessary. They should be set already.
351
Assert(indexTupDesc->attrs[i]->attnum == i + 1);
352
Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
354
InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
357
CatalogCloseIndexes(indstate);
359
heap_close(pg_attribute, RowExclusiveLock);
362
/* ----------------------------------------------------------------
363
* UpdateIndexRelation
365
* Construct and insert a new entry in the pg_index catalog
366
* ----------------------------------------------------------------
369
UpdateIndexRelation(Oid indexoid,
371
IndexInfo *indexInfo,
379
int2vector *indoption;
382
Datum values[Natts_pg_index];
383
bool nulls[Natts_pg_index];
389
* Copy the index key, opclass, and indoption info into arrays (should we
390
* make the caller pass them like this to start with?)
392
indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
393
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
394
indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
395
indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
396
indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
399
* Convert the index expressions (if any) to a text datum
401
if (indexInfo->ii_Expressions != NIL)
405
exprsString = nodeToString(indexInfo->ii_Expressions);
406
exprsDatum = CStringGetTextDatum(exprsString);
410
exprsDatum = (Datum) 0;
413
* Convert the index predicate (if any) to a text datum. Note we convert
414
* implicit-AND format to normal explicit-AND for storage.
416
if (indexInfo->ii_Predicate != NIL)
420
predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
421
predDatum = CStringGetTextDatum(predString);
425
predDatum = (Datum) 0;
428
* open the system catalog index relation
430
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
433
* Build a pg_index tuple
435
MemSet(nulls, false, sizeof(nulls));
437
values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
438
values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
439
values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
440
values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
441
values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
442
values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
443
values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
444
values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
445
/* we set isvalid and isready the same way */
446
values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
447
values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
448
values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
449
values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
450
values[Anum_pg_index_indexprs - 1] = exprsDatum;
451
if (exprsDatum == (Datum) 0)
452
nulls[Anum_pg_index_indexprs - 1] = true;
453
values[Anum_pg_index_indpred - 1] = predDatum;
454
if (predDatum == (Datum) 0)
455
nulls[Anum_pg_index_indpred - 1] = true;
457
tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
460
* insert the tuple into the pg_index catalog
462
simple_heap_insert(pg_index, tuple);
464
/* update the indexes on pg_index */
465
CatalogUpdateIndexes(pg_index, tuple);
468
* close the relation and free the tuple
470
heap_close(pg_index, RowExclusiveLock);
471
heap_freetuple(tuple);
478
* heapRelationId: OID of table to build index on
479
* indexRelationName: what it say
480
* indexRelationId: normally, pass InvalidOid to let this routine
481
* generate an OID for the index. During bootstrap this may be
482
* nonzero to specify a preselected OID.
483
* indexInfo: same info executor uses to insert into the index
484
* accessMethodObjectId: OID of index AM to use
485
* tableSpaceId: OID of tablespace to use
486
* classObjectId: array of index opclass OIDs, one per index column
487
* coloptions: array of per-index-column indoption settings
488
* reloptions: AM-specific options
489
* isprimary: index is a PRIMARY KEY
490
* isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
491
* allow_system_table_mods: allow table to be a system catalog
492
* skip_build: true to skip the index_build() step for the moment; caller
493
* must do it later (typically via reindex_index())
494
* concurrent: if true, do not lock the table against writers. The index
495
* will be marked "invalid" and the caller must take additional steps
498
* Returns OID of the created index.
501
index_create(Oid heapRelationId,
502
const char *indexRelationName,
504
IndexInfo *indexInfo,
505
Oid accessMethodObjectId,
512
bool allow_system_table_mods,
517
Relation heapRelation;
518
Relation indexRelation;
519
TupleDesc indexTupDesc;
520
bool shared_relation;
524
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
527
* Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
528
* index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
531
heapRelation = heap_open(heapRelationId,
532
(concurrent ? ShareUpdateExclusiveLock : ShareLock));
535
* The index will be in the same namespace as its parent table, and is
536
* shared across databases if and only if the parent is.
538
namespaceId = RelationGetNamespace(heapRelation);
539
shared_relation = heapRelation->rd_rel->relisshared;
544
if (indexInfo->ii_NumIndexAttrs < 1)
545
elog(ERROR, "must index at least one column");
547
if (!allow_system_table_mods &&
548
IsSystemRelation(heapRelation) &&
549
IsNormalProcessingMode())
551
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
552
errmsg("user-defined indexes on system catalog tables are not supported")));
555
* concurrent index build on a system catalog is unsafe because we tend to
556
* release locks before committing in catalogs
559
IsSystemRelation(heapRelation))
561
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
562
errmsg("concurrent index creation on system catalog tables is not supported")));
565
* We cannot allow indexing a shared relation after initdb (because
566
* there's no way to make the entry in other databases' pg_class).
568
if (shared_relation && !IsBootstrapProcessingMode())
570
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
571
errmsg("shared indexes cannot be created after initdb")));
574
* Validate shared/non-shared tablespace (must check this before doing
575
* GetNewRelFileNode, to prevent Assert therein)
579
if (tableSpaceId != GLOBALTABLESPACE_OID)
580
/* elog since this is not a user-facing error */
582
"shared relations must be placed in pg_global tablespace");
586
if (tableSpaceId == GLOBALTABLESPACE_OID)
588
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
589
errmsg("only shared relations can be placed in pg_global tablespace")));
592
if (get_relname_relid(indexRelationName, namespaceId))
594
(errcode(ERRCODE_DUPLICATE_TABLE),
595
errmsg("relation \"%s\" already exists",
596
indexRelationName)));
599
* construct tuple descriptor for index tuples
601
indexTupDesc = ConstructTupleDescriptor(heapRelation,
603
accessMethodObjectId,
607
* Allocate an OID for the index, unless we were told what to use.
609
* The OID will be the relfilenode as well, so make sure it doesn't
610
* collide with either pg_class OIDs or existing physical files.
612
if (!OidIsValid(indexRelationId))
613
indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
617
* create the index relation's relcache entry and physical disk file. (If
618
* we fail further down, it's the smgr's responsibility to remove the disk
621
indexRelation = heap_create(indexRelationName,
628
allow_system_table_mods);
630
Assert(indexRelationId == RelationGetRelid(indexRelation));
633
* Obtain exclusive lock on it. Although no other backends can see it
634
* until we commit, this prevents deadlock-risk complaints from lock
635
* manager in cases such as CLUSTER.
637
LockRelation(indexRelation, AccessExclusiveLock);
640
* Fill in fields of the index's pg_class entry that are not set correctly
643
* XXX should have a cleaner way to create cataloged indexes
645
indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
646
indexRelation->rd_rel->relam = accessMethodObjectId;
647
indexRelation->rd_rel->relkind = RELKIND_INDEX;
648
indexRelation->rd_rel->relhasoids = false;
651
* store index's pg_class entry
653
InsertPgClassTuple(pg_class, indexRelation,
654
RelationGetRelid(indexRelation),
657
/* done with pg_class */
658
heap_close(pg_class, RowExclusiveLock);
661
* now update the object id's of all the attribute tuple forms in the
662
* index relation's tuple descriptor
664
InitializeAttributeOids(indexRelation,
665
indexInfo->ii_NumIndexAttrs,
669
* append ATTRIBUTE tuples for the index
671
AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
675
* (append INDEX tuple)
677
* Note that this stows away a representation of "predicate".
678
* (Or, could define a rule to maintain the predicate) --Nels, Feb '92
681
UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
682
classObjectId, coloptions, isprimary, !concurrent);
685
* Register constraint and dependencies for the index.
687
* If the index is from a CONSTRAINT clause, construct a pg_constraint
688
* entry. The index is then linked to the constraint, which in turn is
689
* linked to the table. If it's not a CONSTRAINT, make the dependency
690
* directly on the table.
692
* We don't need a dependency on the namespace, because there'll be an
693
* indirect dependency via our parent table.
695
* During bootstrap we can't register any dependencies, and we don't try
696
* to make a constraint either.
698
if (!IsBootstrapProcessingMode())
700
ObjectAddress myself,
703
myself.classId = RelationRelationId;
704
myself.objectId = indexRelationId;
705
myself.objectSubId = 0;
713
constraintType = CONSTRAINT_PRIMARY;
714
else if (indexInfo->ii_Unique)
715
constraintType = CONSTRAINT_UNIQUE;
718
elog(ERROR, "constraint must be PRIMARY or UNIQUE");
719
constraintType = 0; /* keep compiler quiet */
722
/* Shouldn't have any expressions */
723
if (indexInfo->ii_Expressions)
724
elog(ERROR, "constraints cannot have index expressions");
726
conOid = CreateConstraintEntry(indexRelationName,
729
false, /* isDeferrable */
730
false, /* isDeferred */
732
indexInfo->ii_KeyAttrNumbers,
733
indexInfo->ii_NumIndexAttrs,
734
InvalidOid, /* no domain */
735
InvalidOid, /* no foreign key */
744
InvalidOid, /* no associated index */
745
NULL, /* no check constraint */
751
referenced.classId = ConstraintRelationId;
752
referenced.objectId = conOid;
753
referenced.objectSubId = 0;
755
recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
759
bool have_simple_col = false;
761
/* Create auto dependencies on simply-referenced columns */
762
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
764
if (indexInfo->ii_KeyAttrNumbers[i] != 0)
766
referenced.classId = RelationRelationId;
767
referenced.objectId = heapRelationId;
768
referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
770
recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
772
have_simple_col = true;
777
* It's possible for an index to not depend on any columns of the
778
* table at all, in which case we need to give it a dependency on
779
* the table as a whole; else it won't get dropped when the table
780
* is dropped. This edge case is not totally useless; for
781
* example, a unique index on a constant expression can serve to
782
* prevent a table from containing more than one row.
784
if (!have_simple_col &&
785
!contain_vars_of_level((Node *) indexInfo->ii_Expressions, 0) &&
786
!contain_vars_of_level((Node *) indexInfo->ii_Predicate, 0))
788
referenced.classId = RelationRelationId;
789
referenced.objectId = heapRelationId;
790
referenced.objectSubId = 0;
792
recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
796
/* Store dependency on operator classes */
797
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
799
referenced.classId = OperatorClassRelationId;
800
referenced.objectId = classObjectId[i];
801
referenced.objectSubId = 0;
803
recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
806
/* Store dependencies on anything mentioned in index expressions */
807
if (indexInfo->ii_Expressions)
809
recordDependencyOnSingleRelExpr(&myself,
810
(Node *) indexInfo->ii_Expressions,
816
/* Store dependencies on anything mentioned in predicate */
817
if (indexInfo->ii_Predicate)
819
recordDependencyOnSingleRelExpr(&myself,
820
(Node *) indexInfo->ii_Predicate,
828
* Advance the command counter so that we can see the newly-entered
829
* catalog tuples for the index.
831
CommandCounterIncrement();
834
* In bootstrap mode, we have to fill in the index strategy structure with
835
* information from the catalogs. If we aren't bootstrapping, then the
836
* relcache entry has already been rebuilt thanks to sinval update during
837
* CommandCounterIncrement.
839
if (IsBootstrapProcessingMode())
840
RelationInitIndexAccessInfo(indexRelation);
842
Assert(indexRelation->rd_indexcxt != NULL);
845
* If this is bootstrap (initdb) time, then we don't actually fill in the
846
* index yet. We'll be creating more indexes and classes later, so we
847
* delay filling them in until just before we're done with bootstrapping.
848
* Similarly, if the caller specified skip_build then filling the index is
849
* delayed till later (ALTER TABLE can save work in some cases with this).
850
* Otherwise, we call the AM routine that constructs the index.
852
if (IsBootstrapProcessingMode())
854
index_register(heapRelationId, indexRelationId, indexInfo);
859
* Caller is responsible for filling the index later on. However,
860
* we'd better make sure that the heap relation is correctly marked as
863
index_update_stats(heapRelation,
867
heapRelation->rd_rel->reltuples);
868
/* Make the above update visible */
869
CommandCounterIncrement();
873
index_build(heapRelation, indexRelation, indexInfo, isprimary);
877
* Close the heap and index; but we keep the locks that we acquired above
878
* until end of transaction.
880
index_close(indexRelation, NoLock);
881
heap_close(heapRelation, NoLock);
883
return indexRelationId;
889
* NOTE: this routine should now only be called through performDeletion(),
890
* else associated dependencies won't be cleaned up.
893
index_drop(Oid indexId)
896
Relation userHeapRelation;
897
Relation userIndexRelation;
898
Relation indexRelation;
903
* To drop an index safely, we must grab exclusive lock on its parent
904
* table; otherwise there could be other backends using the index!
905
* Exclusive lock on the index alone is insufficient because another
906
* backend might be in the midst of devising a query plan that will use
907
* the index. The parser and planner take care to hold an appropriate
908
* lock on the parent table while working, but having them hold locks on
909
* all the indexes too seems overly expensive. We do grab exclusive lock
910
* on the index too, just to be safe. Both locks must be held till end of
911
* transaction, else other backends will still see this index in pg_index.
913
heapId = IndexGetRelation(indexId);
914
userHeapRelation = heap_open(heapId, AccessExclusiveLock);
916
userIndexRelation = index_open(indexId, AccessExclusiveLock);
919
* Schedule physical removal of the files
921
RelationDropStorage(userIndexRelation);
924
* Close and flush the index's relcache entry, to ensure relcache doesn't
925
* try to rebuild it while we're deleting catalog entries. We keep the
928
index_close(userIndexRelation, NoLock);
930
RelationForgetRelation(indexId);
933
* fix INDEX relation, and check for expressional index
935
indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
937
tuple = SearchSysCache(INDEXRELID,
938
ObjectIdGetDatum(indexId),
940
if (!HeapTupleIsValid(tuple))
941
elog(ERROR, "cache lookup failed for index %u", indexId);
943
hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
945
simple_heap_delete(indexRelation, &tuple->t_self);
947
ReleaseSysCache(tuple);
948
heap_close(indexRelation, RowExclusiveLock);
951
* if it has any expression columns, we might have stored statistics about
955
RemoveStatistics(indexId, 0);
958
* fix ATTRIBUTE relation
960
DeleteAttributeTuples(indexId);
963
* fix RELATION relation
965
DeleteRelationTuple(indexId);
968
* We are presently too lazy to attempt to compute the new correct value
969
* of relhasindex (the next VACUUM will fix it if necessary). So there is
970
* no need to update the pg_class tuple for the owning relation. But we
971
* must send out a shared-cache-inval notice on the owning relation to
972
* ensure other backends update their relcache lists of indexes.
974
CacheInvalidateRelcache(userHeapRelation);
977
* Close owning rel, but keep lock
979
heap_close(userHeapRelation, NoLock);
982
/* ----------------------------------------------------------------
983
* index_build support
984
* ----------------------------------------------------------------
989
* Construct an IndexInfo record for an open index
991
* IndexInfo stores the information about the index that's needed by
992
* FormIndexDatum, which is used for both index_build() and later insertion
993
* of individual index tuples. Normally we build an IndexInfo for an index
994
* just once per command, and then use it for (potentially) many tuples.
998
BuildIndexInfo(Relation index)
1000
IndexInfo *ii = makeNode(IndexInfo);
1001
Form_pg_index indexStruct = index->rd_index;
1005
/* check the number of keys, and copy attr numbers into the IndexInfo */
1006
numKeys = indexStruct->indnatts;
1007
if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1008
elog(ERROR, "invalid indnatts %d for index %u",
1009
numKeys, RelationGetRelid(index));
1010
ii->ii_NumIndexAttrs = numKeys;
1011
for (i = 0; i < numKeys; i++)
1012
ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1014
/* fetch any expressions needed for expressional indexes */
1015
ii->ii_Expressions = RelationGetIndexExpressions(index);
1016
ii->ii_ExpressionsState = NIL;
1018
/* fetch index predicate if any */
1019
ii->ii_Predicate = RelationGetIndexPredicate(index);
1020
ii->ii_PredicateState = NIL;
1023
ii->ii_Unique = indexStruct->indisunique;
1024
ii->ii_ReadyForInserts = indexStruct->indisready;
1026
/* initialize index-build state to default */
1027
ii->ii_Concurrent = false;
1028
ii->ii_BrokenHotChain = false;
1035
* Construct values[] and isnull[] arrays for a new index tuple.
1037
* indexInfo Info about the index
1038
* slot Heap tuple for which we must prepare an index entry
1039
* estate executor state for evaluating any index expressions
1040
* values Array of index Datums (output area)
1041
* isnull Array of is-null indicators (output area)
1043
* When there are no index expressions, estate may be NULL. Otherwise it
1044
* must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1045
* context must point to the heap tuple passed in.
1047
* Notice we don't actually call index_form_tuple() here; we just prepare
1048
* its input arrays values[] and isnull[]. This is because the index AM
1049
* may wish to alter the data before storage.
1053
FormIndexDatum(IndexInfo *indexInfo,
1054
TupleTableSlot *slot,
1059
ListCell *indexpr_item;
1062
if (indexInfo->ii_Expressions != NIL &&
1063
indexInfo->ii_ExpressionsState == NIL)
1065
/* First time through, set up expression evaluation state */
1066
indexInfo->ii_ExpressionsState = (List *)
1067
ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1069
/* Check caller has set up context correctly */
1070
Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1072
indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1074
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1076
int keycol = indexInfo->ii_KeyAttrNumbers[i];
1083
* Plain index column; get the value we need directly from the
1086
iDatum = slot_getattr(slot, keycol, &isNull);
1091
* Index expression --- need to evaluate it.
1093
if (indexpr_item == NULL)
1094
elog(ERROR, "wrong number of index expressions");
1095
iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1096
GetPerTupleExprContext(estate),
1099
indexpr_item = lnext(indexpr_item);
1105
if (indexpr_item != NULL)
1106
elog(ERROR, "wrong number of index expressions");
1111
* index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1113
* This routine updates the pg_class row of either an index or its parent
1114
* relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
1115
* to ensure we can do all the necessary work in just one update.
1117
* hasindex: set relhasindex to this value
1118
* isprimary: if true, set relhaspkey true; else no change
1119
* reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
1121
* reltuples: set reltuples to this value
1123
* relpages is also updated (using RelationGetNumberOfBlocks()).
1125
* NOTE: an important side-effect of this operation is that an SI invalidation
1126
* message is sent out to all backends --- including me --- causing relcache
1127
* entries to be flushed or updated with the new data. This must happen even
1128
* if we find that no change is needed in the pg_class row. When updating
1129
* a heap entry, this ensures that other backends find out about the new
1130
* index. When updating an index, it's important because some index AMs
1131
* expect a relcache flush to occur after REINDEX.
1134
index_update_stats(Relation rel, bool hasindex, bool isprimary,
1135
Oid reltoastidxid, double reltuples)
1137
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1138
Oid relid = RelationGetRelid(rel);
1141
Form_pg_class rd_rel;
1145
* We always update the pg_class row using a non-transactional,
1146
* overwrite-in-place update. There are several reasons for this:
1148
* 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1150
* 2. We could be reindexing pg_class itself, in which case we can't move
1151
* its pg_class row because CatalogUpdateIndexes might not know about all
1152
* the indexes yet (see reindex_relation).
1154
* 3. Because we execute CREATE INDEX with just share lock on the parent
1155
* rel (to allow concurrent index creations), an ordinary update could
1156
* suffer a tuple-concurrently-updated failure against another CREATE
1157
* INDEX committing at about the same time. We can avoid that by having
1158
* them both do nontransactional updates (we assume they will both be
1159
* trying to change the pg_class row to the same thing, so it doesn't
1160
* matter which goes first).
1162
* 4. Even with just a single CREATE INDEX, there's a risk factor because
1163
* someone else might be trying to open the rel while we commit, and this
1164
* creates a race condition as to whether he will see both or neither of
1165
* the pg_class row versions as valid. Again, a non-transactional update
1166
* avoids the risk. It is indeterminate which state of the row the other
1167
* process will see, but it doesn't matter (if he's only taking
1168
* AccessShareLock, then it's not critical that he see relhasindex true).
1170
* It is safe to use a non-transactional update even though our
1171
* transaction could still fail before committing. Setting relhasindex
1172
* true is safe even if there are no indexes (VACUUM will eventually fix
1173
* it), and of course the relpages and reltuples counts are correct (or at
1174
* least more so than the old values) regardless.
1177
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1180
* Make a copy of the tuple to update. Normally we use the syscache, but
1181
* we can't rely on that during bootstrap or while reindexing pg_class
1184
if (IsBootstrapProcessingMode() ||
1185
ReindexIsProcessingHeap(RelationRelationId))
1187
/* don't assume syscache will work */
1188
HeapScanDesc pg_class_scan;
1191
ScanKeyInit(&key[0],
1192
ObjectIdAttributeNumber,
1193
BTEqualStrategyNumber, F_OIDEQ,
1194
ObjectIdGetDatum(relid));
1196
pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
1197
tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1198
tuple = heap_copytuple(tuple);
1199
heap_endscan(pg_class_scan);
1203
/* normal case, use syscache */
1204
tuple = SearchSysCacheCopy(RELOID,
1205
ObjectIdGetDatum(relid),
1209
if (!HeapTupleIsValid(tuple))
1210
elog(ERROR, "could not find tuple for relation %u", relid);
1211
rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1213
/* Apply required updates, if any, to copied tuple */
1216
if (rd_rel->relhasindex != hasindex)
1218
rd_rel->relhasindex = hasindex;
1223
if (!rd_rel->relhaspkey)
1225
rd_rel->relhaspkey = true;
1229
if (OidIsValid(reltoastidxid))
1231
Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
1232
if (rd_rel->reltoastidxid != reltoastidxid)
1234
rd_rel->reltoastidxid = reltoastidxid;
1238
if (rd_rel->reltuples != (float4) reltuples)
1240
rd_rel->reltuples = (float4) reltuples;
1243
if (rd_rel->relpages != (int32) relpages)
1245
rd_rel->relpages = (int32) relpages;
1250
* If anything changed, write out the tuple
1254
heap_inplace_update(pg_class, tuple);
1255
/* the above sends a cache inval message */
1259
/* no need to change tuple, but force relcache inval anyway */
1260
CacheInvalidateRelcacheByTuple(tuple);
1263
heap_freetuple(tuple);
1265
heap_close(pg_class, RowExclusiveLock);
1269
* setNewRelfilenode - assign a new relfilenode value to the relation
1271
* Caller must already hold exclusive lock on the relation.
1273
* The relation is marked with relfrozenxid=freezeXid (InvalidTransactionId
1274
* must be passed for indexes)
1277
setNewRelfilenode(Relation relation, TransactionId freezeXid)
1280
RelFileNode newrnode;
1283
Form_pg_class rd_rel;
1285
/* Can't change relfilenode for nailed tables (indexes ok though) */
1286
Assert(!relation->rd_isnailed ||
1287
relation->rd_rel->relkind == RELKIND_INDEX);
1288
/* Can't change for shared tables or indexes */
1289
Assert(!relation->rd_rel->relisshared);
1290
/* Indexes must have Invalid frozenxid; other relations must not */
1291
Assert((relation->rd_rel->relkind == RELKIND_INDEX &&
1292
freezeXid == InvalidTransactionId) ||
1293
TransactionIdIsNormal(freezeXid));
1295
/* Allocate a new relfilenode */
1296
newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
1297
relation->rd_rel->relisshared,
1301
* Find the pg_class tuple for the given relation. This is not used
1302
* during bootstrap, so okay to use heap_update always.
1304
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1306
tuple = SearchSysCacheCopy(RELOID,
1307
ObjectIdGetDatum(RelationGetRelid(relation)),
1309
if (!HeapTupleIsValid(tuple))
1310
elog(ERROR, "could not find tuple for relation %u",
1311
RelationGetRelid(relation));
1312
rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1315
* ... and create storage for corresponding forks in the new relfilenode.
1317
* NOTE: any conflict in relfilenode value will be caught here
1319
newrnode = relation->rd_node;
1320
newrnode.relNode = newrelfilenode;
1323
* Create the main fork, like heap_create() does, and drop the old
1326
RelationCreateStorage(newrnode, relation->rd_istemp);
1327
smgrclosenode(newrnode);
1328
RelationDropStorage(relation);
1330
/* update the pg_class row */
1331
rd_rel->relfilenode = newrelfilenode;
1332
rd_rel->relpages = 0; /* it's empty until further notice */
1333
rd_rel->reltuples = 0;
1334
rd_rel->relfrozenxid = freezeXid;
1335
simple_heap_update(pg_class, &tuple->t_self, tuple);
1336
CatalogUpdateIndexes(pg_class, tuple);
1338
heap_freetuple(tuple);
1340
heap_close(pg_class, RowExclusiveLock);
1342
/* Make sure the relfilenode change is visible */
1343
CommandCounterIncrement();
1345
/* Mark the rel as having a new relfilenode in current transaction */
1346
RelationCacheMarkNewRelfilenode(relation);
1351
* index_build - invoke access-method-specific index build procedure
1353
* On entry, the index's catalog entries are valid, and its physical disk
1354
* file has been created but is empty. We call the AM-specific build
1355
* procedure to fill in the index contents. We then update the pg_class
1356
* entries of the index and heap relation as needed, using statistics
1357
* returned by ambuild as well as data passed by the caller.
1359
* Note: when reindexing an existing index, isprimary can be false;
1360
* the index is already properly marked and need not be re-marked.
1362
* Note: before Postgres 8.2, the passed-in heap and index Relations
1363
* were automatically closed by this routine. This is no longer the case.
1364
* The caller opened 'em, and the caller should close 'em.
1367
index_build(Relation heapRelation,
1368
Relation indexRelation,
1369
IndexInfo *indexInfo,
1372
RegProcedure procedure;
1373
IndexBuildResult *stats;
1375
bool save_secdefcxt;
1380
Assert(RelationIsValid(indexRelation));
1381
Assert(PointerIsValid(indexRelation->rd_am));
1383
procedure = indexRelation->rd_am->ambuild;
1384
Assert(RegProcedureIsValid(procedure));
1387
* Switch to the table owner's userid, so that any index functions are
1390
GetUserIdAndContext(&save_userid, &save_secdefcxt);
1391
SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1394
* Call the access method's build procedure
1396
stats = (IndexBuildResult *)
1397
DatumGetPointer(OidFunctionCall3(procedure,
1398
PointerGetDatum(heapRelation),
1399
PointerGetDatum(indexRelation),
1400
PointerGetDatum(indexInfo)));
1401
Assert(PointerIsValid(stats));
1403
/* Restore userid */
1404
SetUserIdAndContext(save_userid, save_secdefcxt);
1407
* If we found any potentially broken HOT chains, mark the index as not
1408
* being usable until the current transaction is below the event horizon.
1409
* See src/backend/access/heap/README.HOT for discussion.
1411
if (indexInfo->ii_BrokenHotChain)
1413
Oid indexId = RelationGetRelid(indexRelation);
1415
HeapTuple indexTuple;
1416
Form_pg_index indexForm;
1418
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1420
indexTuple = SearchSysCacheCopy(INDEXRELID,
1421
ObjectIdGetDatum(indexId),
1423
if (!HeapTupleIsValid(indexTuple))
1424
elog(ERROR, "cache lookup failed for index %u", indexId);
1425
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1427
indexForm->indcheckxmin = true;
1428
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1429
CatalogUpdateIndexes(pg_index, indexTuple);
1431
heap_freetuple(indexTuple);
1432
heap_close(pg_index, RowExclusiveLock);
1436
* Update heap and index pg_class rows
1438
index_update_stats(heapRelation,
1441
(heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
1442
RelationGetRelid(indexRelation) : InvalidOid,
1443
stats->heap_tuples);
1445
index_update_stats(indexRelation,
1449
stats->index_tuples);
1451
/* Make the updated versions visible */
1452
CommandCounterIncrement();
1457
* IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
1459
* This is called back from an access-method-specific index build procedure
1460
* after the AM has done whatever setup it needs. The parent heap relation
1461
* is scanned to find tuples that should be entered into the index. Each
1462
* such tuple is passed to the AM's callback routine, which does the right
1463
* things to add it to the new index. After we return, the AM's index
1464
* build procedure does whatever cleanup is needed; in particular, it should
1465
* close the heap and index relations.
1467
* The total count of heap tuples is returned. This is for updating pg_class
1468
* statistics. (It's annoying not to be able to do that here, but we can't
1469
* do it until after the relation is closed.) Note that the index AM itself
1470
* must keep track of the number of index tuples; we don't do so here because
1471
* the AM might reject some of the tuples for its own reasons, such as being
1472
* unable to store NULLs.
1474
* A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1475
* any potentially broken HOT chains. Currently, we set this if there are
1476
* any RECENTLY_DEAD entries in a HOT chain, without trying very hard to
1477
* detect whether they're really incompatible with the chain tip.
1480
IndexBuildHeapScan(Relation heapRelation,
1481
Relation indexRelation,
1482
IndexInfo *indexInfo,
1484
IndexBuildCallback callback,
1485
void *callback_state)
1488
HeapTuple heapTuple;
1489
Datum values[INDEX_MAX_KEYS];
1490
bool isnull[INDEX_MAX_KEYS];
1493
TupleTableSlot *slot;
1495
ExprContext *econtext;
1497
TransactionId OldestXmin;
1498
BlockNumber root_blkno = InvalidBlockNumber;
1499
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1504
Assert(OidIsValid(indexRelation->rd_rel->relam));
1507
* Need an EState for evaluation of index expressions and partial-index
1508
* predicates. Also a slot to hold the current tuple.
1510
estate = CreateExecutorState();
1511
econtext = GetPerTupleExprContext(estate);
1512
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1514
/* Arrange for econtext's scan tuple to be the tuple under test */
1515
econtext->ecxt_scantuple = slot;
1517
/* Set up execution state for predicate, if any. */
1518
predicate = (List *)
1519
ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1523
* Prepare for scan of the base relation. In a normal index build, we use
1524
* SnapshotAny because we must retrieve all tuples and do our own time
1525
* qual checks (because we have to index RECENTLY_DEAD tuples). In a
1526
* concurrent build, we take a regular MVCC snapshot and index whatever's
1527
* live according to that. During bootstrap we just use SnapshotNow.
1529
if (IsBootstrapProcessingMode())
1531
snapshot = SnapshotNow;
1532
OldestXmin = InvalidTransactionId; /* not used */
1534
else if (indexInfo->ii_Concurrent)
1536
snapshot = RegisterSnapshot(GetTransactionSnapshot());
1537
OldestXmin = InvalidTransactionId; /* not used */
1541
snapshot = SnapshotAny;
1542
/* okay to ignore lazy VACUUMs here */
1543
OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
1546
scan = heap_beginscan_strat(heapRelation, /* relation */
1547
snapshot, /* snapshot */
1548
0, /* number of keys */
1549
NULL, /* scan key */
1550
true, /* buffer access strategy OK */
1551
allow_sync); /* syncscan OK? */
1556
* Scan all tuples in the base relation.
1558
while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1562
CHECK_FOR_INTERRUPTS();
1565
* When dealing with a HOT-chain of updated tuples, we want to index
1566
* the values of the live tuple (if any), but index it under the TID
1567
* of the chain's root tuple. This approach is necessary to preserve
1568
* the HOT-chain structure in the heap. So we need to be able to find
1569
* the root item offset for every tuple that's in a HOT-chain. When
1570
* first reaching a new page of the relation, call
1571
* heap_get_root_tuples() to build a map of root item offsets on the
1574
* It might look unsafe to use this information across buffer
1575
* lock/unlock. However, we hold ShareLock on the table so no
1576
* ordinary insert/update/delete should occur; and we hold pin on the
1577
* buffer continuously while visiting the page, so no pruning
1578
* operation can occur either.
1580
* Note the implied assumption that there is no more than one live
1581
* tuple per HOT-chain ...
1583
if (scan->rs_cblock != root_blkno)
1585
Page page = BufferGetPage(scan->rs_cbuf);
1587
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1588
heap_get_root_tuples(page, root_offsets);
1589
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1591
root_blkno = scan->rs_cblock;
1594
if (snapshot == SnapshotAny)
1596
/* do our own time qual check */
1602
* We could possibly get away with not locking the buffer here,
1603
* since caller should hold ShareLock on the relation, but let's
1604
* be conservative about it. (This remark is still correct even
1605
* with HOT-pruning: our pin on the buffer prevents pruning.)
1607
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1609
switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
1612
case HEAPTUPLE_DEAD:
1613
/* Definitely dead, we can ignore it */
1615
tupleIsAlive = false;
1617
case HEAPTUPLE_LIVE:
1618
/* Normal case, index and unique-check it */
1620
tupleIsAlive = true;
1622
case HEAPTUPLE_RECENTLY_DEAD:
1625
* If tuple is recently deleted then we must index it
1626
* anyway to preserve MVCC semantics. (Pre-existing
1627
* transactions could try to use the index after we finish
1628
* building it, and may need to see such tuples.)
1630
* However, if it was HOT-updated then we must only index
1631
* the live tuple at the end of the HOT-chain. Since this
1632
* breaks semantics for pre-existing snapshots, mark the
1633
* index as unusable for them.
1635
* If we've already decided that the index will be unsafe
1636
* for old snapshots, we may as well stop indexing
1637
* recently-dead tuples, since there's no longer any
1640
if (HeapTupleIsHotUpdated(heapTuple))
1643
/* mark the index as unsafe for old snapshots */
1644
indexInfo->ii_BrokenHotChain = true;
1646
else if (indexInfo->ii_BrokenHotChain)
1650
/* In any case, exclude the tuple from unique-checking */
1651
tupleIsAlive = false;
1653
case HEAPTUPLE_INSERT_IN_PROGRESS:
1656
* Since caller should hold ShareLock or better, we should
1657
* not see any tuples inserted by open transactions ---
1658
* unless it's our own transaction. (Consider INSERT
1659
* followed by CREATE INDEX within a transaction.) An
1660
* exception occurs when reindexing a system catalog,
1661
* because we often release lock on system catalogs before
1662
* committing. In that case we wait for the inserting
1663
* transaction to finish and check again. (We could do
1664
* that on user tables too, but since the case is not
1665
* expected it seems better to throw an error.)
1667
if (!TransactionIdIsCurrentTransactionId(
1668
HeapTupleHeaderGetXmin(heapTuple->t_data)))
1670
if (!IsSystemRelation(heapRelation))
1671
elog(ERROR, "concurrent insert in progress");
1675
* Must drop the lock on the buffer before we wait
1677
TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1679
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1680
XactLockTableWait(xwait);
1686
* We must index such tuples, since if the index build
1687
* commits then they're good.
1690
tupleIsAlive = true;
1692
case HEAPTUPLE_DELETE_IN_PROGRESS:
1695
* Since caller should hold ShareLock or better, we should
1696
* not see any tuples deleted by open transactions ---
1697
* unless it's our own transaction. (Consider DELETE
1698
* followed by CREATE INDEX within a transaction.) An
1699
* exception occurs when reindexing a system catalog,
1700
* because we often release lock on system catalogs before
1701
* committing. In that case we wait for the deleting
1702
* transaction to finish and check again. (We could do
1703
* that on user tables too, but since the case is not
1704
* expected it seems better to throw an error.)
1706
Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
1707
if (!TransactionIdIsCurrentTransactionId(
1708
HeapTupleHeaderGetXmax(heapTuple->t_data)))
1710
if (!IsSystemRelation(heapRelation))
1711
elog(ERROR, "concurrent delete in progress");
1715
* Must drop the lock on the buffer before we wait
1717
TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
1719
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1720
XactLockTableWait(xwait);
1726
* Otherwise, we have to treat these tuples just like
1727
* RECENTLY_DELETED ones.
1729
if (HeapTupleIsHotUpdated(heapTuple))
1732
/* mark the index as unsafe for old snapshots */
1733
indexInfo->ii_BrokenHotChain = true;
1735
else if (indexInfo->ii_BrokenHotChain)
1739
/* In any case, exclude the tuple from unique-checking */
1740
tupleIsAlive = false;
1743
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1744
indexIt = tupleIsAlive = false; /* keep compiler quiet */
1748
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1755
/* heap_getnext did the time qual check */
1756
tupleIsAlive = true;
1761
MemoryContextReset(econtext->ecxt_per_tuple_memory);
1763
/* Set up for predicate or expression evaluation */
1764
ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1767
* In a partial index, discard tuples that don't satisfy the
1770
if (predicate != NIL)
1772
if (!ExecQual(predicate, econtext, false))
1777
* For the current heap tuple, extract all the attributes we use in
1778
* this index, and note which are null. This also performs evaluation
1779
* of any expressions needed.
1781
FormIndexDatum(indexInfo,
1788
* You'd think we should go ahead and build the index tuple here, but
1789
* some index AMs want to do further processing on the data first. So
1790
* pass the values[] and isnull[] arrays, instead.
1793
if (HeapTupleIsHeapOnly(heapTuple))
1796
* For a heap-only tuple, pretend its TID is that of the root. See
1797
* src/backend/access/heap/README.HOT for discussion.
1799
HeapTupleData rootTuple;
1800
OffsetNumber offnum;
1802
rootTuple = *heapTuple;
1803
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1805
Assert(OffsetNumberIsValid(root_offsets[offnum - 1]));
1807
ItemPointerSetOffsetNumber(&rootTuple.t_self,
1808
root_offsets[offnum - 1]);
1810
/* Call the AM's callback routine to process the tuple */
1811
callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
1816
/* Call the AM's callback routine to process the tuple */
1817
callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1824
/* we can now forget our snapshot, if set */
1825
if (indexInfo->ii_Concurrent)
1826
UnregisterSnapshot(snapshot);
1828
ExecDropSingleTupleTableSlot(slot);
1830
FreeExecutorState(estate);
1832
/* These may have been pointing to the now-gone estate */
1833
indexInfo->ii_ExpressionsState = NIL;
1834
indexInfo->ii_PredicateState = NIL;
1841
* validate_index - support code for concurrent index builds
1843
* We do a concurrent index build by first inserting the catalog entry for the
1844
* index via index_create(), marking it not indisready and not indisvalid.
1845
* Then we commit our transaction and start a new one, then we wait for all
1846
* transactions that could have been modifying the table to terminate. Now
1847
* we know that any subsequently-started transactions will see the index and
1848
* honor its constraints on HOT updates; so while existing HOT-chains might
1849
* be broken with respect to the index, no currently live tuple will have an
1850
* incompatible HOT update done to it. We now build the index normally via
1851
* index_build(), while holding a weak lock that allows concurrent
1852
* insert/update/delete. Also, we index only tuples that are valid
1853
* as of the start of the scan (see IndexBuildHeapScan), whereas a normal
1854
* build takes care to include recently-dead tuples. This is OK because
1855
* we won't mark the index valid until all transactions that might be able
1856
* to see those tuples are gone. The reason for doing that is to avoid
1857
* bogus unique-index failures due to concurrent UPDATEs (we might see
1858
* different versions of the same row as being valid when we pass over them,
1859
* if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
1860
* does not contain any tuples added to the table while we built the index.
1862
* Next, we mark the index "indisready" (but still not "indisvalid") and
1863
* commit the second transaction and start a third. Again we wait for all
1864
* transactions that could have been modifying the table to terminate. Now
1865
* we know that any subsequently-started transactions will see the index and
1866
* insert their new tuples into it. We then take a new reference snapshot
1867
* which is passed to validate_index(). Any tuples that are valid according
1868
* to this snap, but are not in the index, must be added to the index.
1869
* (Any tuples committed live after the snap will be inserted into the
1870
* index by their originating transaction. Any tuples committed dead before
1871
* the snap need not be indexed, because we will wait out all transactions
1872
* that might care about them before we mark the index valid.)
1874
* validate_index() works by first gathering all the TIDs currently in the
1875
* index, using a bulkdelete callback that just stores the TIDs and doesn't
1876
* ever say "delete it". (This should be faster than a plain indexscan;
1877
* also, not all index AMs support full-index indexscan.) Then we sort the
1878
* TIDs, and finally scan the table doing a "merge join" against the TID list
1879
* to see which tuples are missing from the index. Thus we will ensure that
1880
* all tuples valid according to the reference snapshot are in the index.
1882
* Building a unique index this way is tricky: we might try to insert a
1883
* tuple that is already dead or is in process of being deleted, and we
1884
* mustn't have a uniqueness failure against an updated version of the same
1885
* row. We could try to check the tuple to see if it's already dead and tell
1886
* index_insert() not to do the uniqueness check, but that still leaves us
1887
* with a race condition against an in-progress update. To handle that,
1888
* we expect the index AM to recheck liveness of the to-be-inserted tuple
1889
* before it declares a uniqueness error.
1891
* After completing validate_index(), we wait until all transactions that
1892
* were alive at the time of the reference snapshot are gone; this is
1893
* necessary to be sure there are none left with a serializable snapshot
1894
* older than the reference (and hence possibly able to see tuples we did
1895
* not index). Then we mark the index "indisvalid" and commit. Subsequent
1896
* transactions will be able to use it for queries.
1898
* Doing two full table scans is a brute-force strategy. We could try to be
1899
* cleverer, eg storing new tuples in a special area of the table (perhaps
1900
* making the table append-only by setting use_fsm). However that would
1901
* add yet more locking issues.
1904
validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
1906
Relation heapRelation,
1908
IndexInfo *indexInfo;
1909
IndexVacuumInfo ivinfo;
1912
bool save_secdefcxt;
1914
/* Open and lock the parent heap relation */
1915
heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1916
/* And the target index relation */
1917
indexRelation = index_open(indexId, RowExclusiveLock);
1920
* Fetch info needed for index_insert. (You might think this should be
1921
* passed in from DefineIndex, but its copy is long gone due to having
1922
* been built in a previous transaction.)
1924
indexInfo = BuildIndexInfo(indexRelation);
1926
/* mark build is concurrent just for consistency */
1927
indexInfo->ii_Concurrent = true;
1930
* Switch to the table owner's userid, so that any index functions are
1933
GetUserIdAndContext(&save_userid, &save_secdefcxt);
1934
SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1937
* Scan the index and gather up all the TIDs into a tuplesort object.
1939
ivinfo.index = indexRelation;
1940
ivinfo.vacuum_full = false;
1941
ivinfo.analyze_only = false;
1942
ivinfo.message_level = DEBUG2;
1943
ivinfo.num_heap_tuples = -1;
1944
ivinfo.strategy = NULL;
1946
state.tuplesort = tuplesort_begin_datum(TIDOID,
1947
TIDLessOperator, false,
1948
maintenance_work_mem,
1950
state.htups = state.itups = state.tups_inserted = 0;
1952
(void) index_bulk_delete(&ivinfo, NULL,
1953
validate_index_callback, (void *) &state);
1955
/* Execute the sort */
1956
tuplesort_performsort(state.tuplesort);
1959
* Now scan the heap and "merge" it with the index
1961
validate_index_heapscan(heapRelation,
1967
/* Done with tuplesort object */
1968
tuplesort_end(state.tuplesort);
1971
"validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
1972
state.htups, state.itups, state.tups_inserted);
1974
/* Restore userid */
1975
SetUserIdAndContext(save_userid, save_secdefcxt);
1977
/* Close rels, but keep locks */
1978
index_close(indexRelation, NoLock);
1979
heap_close(heapRelation, NoLock);
1983
* validate_index_callback - bulkdelete callback to collect the index TIDs
1986
validate_index_callback(ItemPointer itemptr, void *opaque)
1988
v_i_state *state = (v_i_state *) opaque;
1990
tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
1992
return false; /* never actually delete anything */
1996
* validate_index_heapscan - second table scan for concurrent index build
1998
* This has much code in common with IndexBuildHeapScan, but it's enough
1999
* different that it seems cleaner to have two routines not one.
2002
validate_index_heapscan(Relation heapRelation,
2003
Relation indexRelation,
2004
IndexInfo *indexInfo,
2009
HeapTuple heapTuple;
2010
Datum values[INDEX_MAX_KEYS];
2011
bool isnull[INDEX_MAX_KEYS];
2013
TupleTableSlot *slot;
2015
ExprContext *econtext;
2016
BlockNumber root_blkno = InvalidBlockNumber;
2017
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2018
bool in_index[MaxHeapTuplesPerPage];
2020
/* state variables for the merge */
2021
ItemPointer indexcursor = NULL;
2022
bool tuplesort_empty = false;
2027
Assert(OidIsValid(indexRelation->rd_rel->relam));
2030
* Need an EState for evaluation of index expressions and partial-index
2031
* predicates. Also a slot to hold the current tuple.
2033
estate = CreateExecutorState();
2034
econtext = GetPerTupleExprContext(estate);
2035
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2037
/* Arrange for econtext's scan tuple to be the tuple under test */
2038
econtext->ecxt_scantuple = slot;
2040
/* Set up execution state for predicate, if any. */
2041
predicate = (List *)
2042
ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2046
* Prepare for scan of the base relation. We need just those tuples
2047
* satisfying the passed-in reference snapshot. We must disable syncscan
2048
* here, because it's critical that we read from block zero forward to
2049
* match the sorted TIDs.
2051
scan = heap_beginscan_strat(heapRelation, /* relation */
2052
snapshot, /* snapshot */
2053
0, /* number of keys */
2054
NULL, /* scan key */
2055
true, /* buffer access strategy OK */
2056
false); /* syncscan not OK */
2059
* Scan all tuples matching the snapshot.
2061
while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2063
ItemPointer heapcursor = &heapTuple->t_self;
2064
ItemPointerData rootTuple;
2065
OffsetNumber root_offnum;
2067
CHECK_FOR_INTERRUPTS();
2072
* As commented in IndexBuildHeapScan, we should index heap-only
2073
* tuples under the TIDs of their root tuples; so when we advance onto
2074
* a new heap page, build a map of root item offsets on the page.
2076
* This complicates merging against the tuplesort output: we will
2077
* visit the live tuples in order by their offsets, but the root
2078
* offsets that we need to compare against the index contents might be
2079
* ordered differently. So we might have to "look back" within the
2080
* tuplesort output, but only within the current page. We handle that
2081
* by keeping a bool array in_index[] showing all the
2082
* already-passed-over tuplesort output TIDs of the current page. We
2083
* clear that array here, when advancing onto a new heap page.
2085
if (scan->rs_cblock != root_blkno)
2087
Page page = BufferGetPage(scan->rs_cbuf);
2089
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2090
heap_get_root_tuples(page, root_offsets);
2091
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2093
memset(in_index, 0, sizeof(in_index));
2095
root_blkno = scan->rs_cblock;
2098
/* Convert actual tuple TID to root TID */
2099
rootTuple = *heapcursor;
2100
root_offnum = ItemPointerGetOffsetNumber(heapcursor);
2102
if (HeapTupleIsHeapOnly(heapTuple))
2104
root_offnum = root_offsets[root_offnum - 1];
2105
Assert(OffsetNumberIsValid(root_offnum));
2106
ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
2110
* "merge" by skipping through the index tuples until we find or pass
2111
* the current root tuple.
2113
while (!tuplesort_empty &&
2115
ItemPointerCompare(indexcursor, &rootTuple) < 0))
2123
* Remember index items seen earlier on the current heap page
2125
if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
2126
in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
2130
tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
2131
&ts_val, &ts_isnull);
2132
Assert(tuplesort_empty || !ts_isnull);
2133
indexcursor = (ItemPointer) DatumGetPointer(ts_val);
2137
* If the tuplesort has overshot *and* we didn't see a match earlier,
2138
* then this tuple is missing from the index, so insert it.
2140
if ((tuplesort_empty ||
2141
ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
2142
!in_index[root_offnum - 1])
2144
MemoryContextReset(econtext->ecxt_per_tuple_memory);
2146
/* Set up for predicate or expression evaluation */
2147
ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2150
* In a partial index, discard tuples that don't satisfy the
2153
if (predicate != NIL)
2155
if (!ExecQual(predicate, econtext, false))
2160
* For the current heap tuple, extract all the attributes we use
2161
* in this index, and note which are null. This also performs
2162
* evaluation of any expressions needed.
2164
FormIndexDatum(indexInfo,
2171
* You'd think we should go ahead and build the index tuple here,
2172
* but some index AMs want to do further processing on the data
2173
* first. So pass the values[] and isnull[] arrays, instead.
2177
* If the tuple is already committed dead, you might think we
2178
* could suppress uniqueness checking, but this is no longer true
2179
* in the presence of HOT, because the insert is actually a proxy
2180
* for a uniqueness check on the whole HOT-chain. That is, the
2181
* tuple we have here could be dead because it was already
2182
* HOT-updated, and if so the updating transaction will not have
2183
* thought it should insert index entries. The index AM will
2184
* check the whole HOT-chain and correctly detect a conflict if
2188
index_insert(indexRelation,
2193
indexInfo->ii_Unique);
2195
state->tups_inserted += 1;
2201
ExecDropSingleTupleTableSlot(slot);
2203
FreeExecutorState(estate);
2205
/* These may have been pointing to the now-gone estate */
2206
indexInfo->ii_ExpressionsState = NIL;
2207
indexInfo->ii_PredicateState = NIL;
2212
* IndexGetRelation: given an index's relation OID, get the OID of the
2213
* relation it is an index on. Uses the system cache.
2216
IndexGetRelation(Oid indexId)
2219
Form_pg_index index;
2222
tuple = SearchSysCache(INDEXRELID,
2223
ObjectIdGetDatum(indexId),
2225
if (!HeapTupleIsValid(tuple))
2226
elog(ERROR, "cache lookup failed for index %u", indexId);
2227
index = (Form_pg_index) GETSTRUCT(tuple);
2228
Assert(index->indexrelid == indexId);
2230
result = index->indrelid;
2231
ReleaseSysCache(tuple);
2236
* reindex_index - This routine is used to recreate a single index
2239
reindex_index(Oid indexId)
2246
IndexInfo *indexInfo;
2247
HeapTuple indexTuple;
2248
Form_pg_index indexForm;
2251
* Open and lock the parent heap relation. ShareLock is sufficient since
2252
* we only need to be sure no schema or data changes are going on.
2254
heapId = IndexGetRelation(indexId);
2255
heapRelation = heap_open(heapId, ShareLock);
2258
* Open the target index relation and get an exclusive lock on it, to
2259
* ensure that no one else is touching this particular index.
2261
iRel = index_open(indexId, AccessExclusiveLock);
2264
* Don't allow reindex on temp tables of other backends ... their local
2265
* buffer manager is not going to cope.
2267
if (isOtherTempNamespace(RelationGetNamespace(iRel)))
2269
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2270
errmsg("cannot reindex temporary tables of other sessions")));
2273
* Also check for active uses of the index in the current transaction;
2274
* we don't want to reindex underneath an open indexscan.
2276
CheckTableNotInUse(iRel, "REINDEX INDEX");
2279
* If it's a shared index, we must do inplace processing (because we have
2280
* no way to update relfilenode in other databases). Otherwise we can do
2281
* it the normal transaction-safe way.
2283
* Since inplace processing isn't crash-safe, we only allow it in a
2284
* standalone backend. (In the REINDEX TABLE and REINDEX DATABASE cases,
2285
* the caller should have detected this.)
2287
inplace = iRel->rd_rel->relisshared;
2289
if (inplace && IsUnderPostmaster)
2291
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2292
errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
2293
RelationGetRelationName(iRel))));
2297
/* Suppress use of the target index while rebuilding it */
2298
SetReindexProcessing(heapId, indexId);
2300
/* Fetch info needed for index_build */
2301
indexInfo = BuildIndexInfo(iRel);
2306
* Truncate the actual file (and discard buffers).
2308
RelationTruncate(iRel, 0);
2313
* We'll build a new physical relation for the index.
2315
setNewRelfilenode(iRel, InvalidTransactionId);
2318
/* Initialize the index and rebuild */
2319
/* Note: we do not need to re-establish pkey setting */
2320
index_build(heapRelation, iRel, indexInfo, false);
2324
/* Make sure flag gets cleared on error exit */
2325
ResetReindexProcessing();
2329
ResetReindexProcessing();
2332
* If the index is marked invalid or not ready (ie, it's from a failed
2333
* CREATE INDEX CONCURRENTLY), we can now mark it valid. This allows
2334
* REINDEX to be used to clean up in such cases.
2336
* We can also reset indcheckxmin, because we have now done a
2337
* non-concurrent index build, *except* in the case where index_build
2338
* found some still-broken HOT chains.
2340
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2342
indexTuple = SearchSysCacheCopy(INDEXRELID,
2343
ObjectIdGetDatum(indexId),
2345
if (!HeapTupleIsValid(indexTuple))
2346
elog(ERROR, "cache lookup failed for index %u", indexId);
2347
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2349
if (!indexForm->indisvalid || !indexForm->indisready ||
2350
(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
2352
indexForm->indisvalid = true;
2353
indexForm->indisready = true;
2354
if (!indexInfo->ii_BrokenHotChain)
2355
indexForm->indcheckxmin = false;
2356
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2357
CatalogUpdateIndexes(pg_index, indexTuple);
2359
heap_close(pg_index, RowExclusiveLock);
2361
/* Close rels, but keep locks */
2362
index_close(iRel, NoLock);
2363
heap_close(heapRelation, NoLock);
2367
* reindex_relation - This routine is used to recreate all indexes
2368
* of a relation (and optionally its toast relation too, if any).
2370
* Returns true if any indexes were rebuilt. Note that a
2371
* CommandCounterIncrement will occur after each index rebuild.
2374
reindex_relation(Oid relid, bool toast_too)
2385
* Open and lock the relation. ShareLock is sufficient since we only need
2386
* to prevent schema and data changes in it.
2388
rel = heap_open(relid, ShareLock);
2390
toast_relid = rel->rd_rel->reltoastrelid;
2393
* Get the list of index OIDs for this relation. (We trust to the
2394
* relcache to get this with a sequential scan if ignoring system
2397
indexIds = RelationGetIndexList(rel);
2400
* reindex_index will attempt to update the pg_class rows for the relation
2401
* and index. If we are processing pg_class itself, we want to make sure
2402
* that the updates do not try to insert index entries into indexes we
2403
* have not processed yet. (When we are trying to recover from corrupted
2404
* indexes, that could easily cause a crash.) We can accomplish this
2405
* because CatalogUpdateIndexes will use the relcache's index list to know
2406
* which indexes to update. We just force the index list to be only the
2407
* stuff we've processed.
2409
* It is okay to not insert entries into the indexes we have not processed
2410
* yet because all of this is transaction-safe. If we fail partway
2411
* through, the updated rows are dead and it doesn't matter whether they
2412
* have index entries. Also, a new pg_class index will be created with an
2413
* entry for its own pg_class row because we do setNewRelfilenode() before
2414
* we do index_build().
2416
* Note that we also clear pg_class's rd_oidindex until the loop is done,
2417
* so that that index can't be accessed either. This means we cannot
2418
* safely generate new relation OIDs while in the loop; shouldn't be a
2421
is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
2423
/* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
2425
(void) RelationGetIndexAttrBitmap(rel);
2427
/* Reindex all the indexes. */
2429
foreach(indexId, indexIds)
2431
Oid indexOid = lfirst_oid(indexId);
2434
RelationSetIndexList(rel, doneIndexes, InvalidOid);
2436
reindex_index(indexOid);
2438
CommandCounterIncrement();
2441
doneIndexes = lappend_oid(doneIndexes, indexOid);
2445
RelationSetIndexList(rel, indexIds, ClassOidIndexId);
2448
* Close rel, but continue to hold the lock.
2450
heap_close(rel, NoLock);
2452
result = (indexIds != NIL);
2455
* If the relation has a secondary toast rel, reindex that too while we
2456
* still hold the lock on the master table.
2458
if (toast_too && OidIsValid(toast_relid))
2459
result |= reindex_relation(toast_relid, false);