~ubuntu-branches/ubuntu/hardy/postgresql-8.4/hardy-backports

« back to all changes in this revision

Viewing changes to src/backend/catalog/index.c

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-03-20 12:00:13 UTC
  • Revision ID: james.westby@ubuntu.com-20090320120013-hogj7egc5mjncc5g
Tags: upstream-8.4~0cvs20090328
ImportĀ upstreamĀ versionĀ 8.4~0cvs20090328

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * index.c
 
4
 *        code to create and destroy POSTGRES index relations
 
5
 *
 
6
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 
7
 * Portions Copyright (c) 1994, Regents of the University of California
 
8
 *
 
9
 *
 
10
 * IDENTIFICATION
 
11
 *        $PostgreSQL$
 
12
 *
 
13
 *
 
14
 * INTERFACE ROUTINES
 
15
 *              index_create()                  - Create a cataloged index relation
 
16
 *              index_drop()                    - Removes index relation from catalogs
 
17
 *              BuildIndexInfo()                - Prepare to insert index tuples
 
18
 *              FormIndexDatum()                - Construct datum vector for one index tuple
 
19
 *
 
20
 *-------------------------------------------------------------------------
 
21
 */
 
22
#include "postgres.h"
 
23
 
 
24
#include <unistd.h>
 
25
 
 
26
#include "access/genam.h"
 
27
#include "access/heapam.h"
 
28
#include "access/relscan.h"
 
29
#include "access/sysattr.h"
 
30
#include "access/transam.h"
 
31
#include "access/xact.h"
 
32
#include "bootstrap/bootstrap.h"
 
33
#include "catalog/catalog.h"
 
34
#include "catalog/dependency.h"
 
35
#include "catalog/heap.h"
 
36
#include "catalog/index.h"
 
37
#include "catalog/indexing.h"
 
38
#include "catalog/namespace.h"
 
39
#include "catalog/pg_constraint.h"
 
40
#include "catalog/pg_operator.h"
 
41
#include "catalog/pg_opclass.h"
 
42
#include "catalog/pg_tablespace.h"
 
43
#include "catalog/pg_type.h"
 
44
#include "catalog/storage.h"
 
45
#include "commands/tablecmds.h"
 
46
#include "executor/executor.h"
 
47
#include "miscadmin.h"
 
48
#include "nodes/nodeFuncs.h"
 
49
#include "optimizer/clauses.h"
 
50
#include "optimizer/var.h"
 
51
#include "storage/bufmgr.h"
 
52
#include "storage/lmgr.h"
 
53
#include "storage/procarray.h"
 
54
#include "storage/smgr.h"
 
55
#include "utils/builtins.h"
 
56
#include "utils/fmgroids.h"
 
57
#include "utils/inval.h"
 
58
#include "utils/lsyscache.h"
 
59
#include "utils/memutils.h"
 
60
#include "utils/relcache.h"
 
61
#include "utils/syscache.h"
 
62
#include "utils/tuplesort.h"
 
63
#include "utils/snapmgr.h"
 
64
#include "utils/tqual.h"
 
65
 
 
66
 
 
67
/* state info for validate_index bulkdelete callback */
 
68
typedef struct
 
69
{
 
70
        Tuplesortstate *tuplesort;      /* for sorting the index TIDs */
 
71
        /* statistics (for debug purposes only): */
 
72
        double          htups,
 
73
                                itups,
 
74
                                tups_inserted;
 
75
} v_i_state;
 
76
 
 
77
/* non-export function prototypes */
 
78
static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
 
79
                                                 IndexInfo *indexInfo,
 
80
                                                 Oid accessMethodObjectId,
 
81
                                                 Oid *classObjectId);
 
82
static void InitializeAttributeOids(Relation indexRelation,
 
83
                                                int numatts, Oid indexoid);
 
84
static void AppendAttributeTuples(Relation indexRelation, int numatts);
 
85
static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
 
86
                                        IndexInfo *indexInfo,
 
87
                                        Oid *classOids,
 
88
                                        int16 *coloptions,
 
89
                                        bool primary,
 
90
                                        bool isvalid);
 
91
static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
 
92
                                   Oid reltoastidxid, double reltuples);
 
93
static bool validate_index_callback(ItemPointer itemptr, void *opaque);
 
94
static void validate_index_heapscan(Relation heapRelation,
 
95
                                                Relation indexRelation,
 
96
                                                IndexInfo *indexInfo,
 
97
                                                Snapshot snapshot,
 
98
                                                v_i_state *state);
 
99
static Oid      IndexGetRelation(Oid indexId);
 
100
 
 
101
 
 
102
/*
 
103
 *              ConstructTupleDescriptor
 
104
 *
 
105
 * Build an index tuple descriptor for a new index
 
106
 */
 
107
static TupleDesc
 
108
ConstructTupleDescriptor(Relation heapRelation,
 
109
                                                 IndexInfo *indexInfo,
 
110
                                                 Oid accessMethodObjectId,
 
111
                                                 Oid *classObjectId)
 
112
{
 
113
        int                     numatts = indexInfo->ii_NumIndexAttrs;
 
114
        ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
 
115
        HeapTuple       amtuple;
 
116
        Form_pg_am      amform;
 
117
        TupleDesc       heapTupDesc;
 
118
        TupleDesc       indexTupDesc;
 
119
        int                     natts;                  /* #atts in heap rel --- for error checks */
 
120
        int                     i;
 
121
 
 
122
        /* We need access to the index AM's pg_am tuple */
 
123
        amtuple = SearchSysCache(AMOID,
 
124
                                                         ObjectIdGetDatum(accessMethodObjectId),
 
125
                                                         0, 0, 0);
 
126
        if (!HeapTupleIsValid(amtuple))
 
127
                elog(ERROR, "cache lookup failed for access method %u",
 
128
                         accessMethodObjectId);
 
129
        amform = (Form_pg_am) GETSTRUCT(amtuple);
 
130
 
 
131
        /* ... and to the table's tuple descriptor */
 
132
        heapTupDesc = RelationGetDescr(heapRelation);
 
133
        natts = RelationGetForm(heapRelation)->relnatts;
 
134
 
 
135
        /*
 
136
         * allocate the new tuple descriptor
 
137
         */
 
138
        indexTupDesc = CreateTemplateTupleDesc(numatts, false);
 
139
 
 
140
        /*
 
141
         * For simple index columns, we copy the pg_attribute row from the parent
 
142
         * relation and modify it as necessary.  For expressions we have to cons
 
143
         * up a pg_attribute row the hard way.
 
144
         */
 
145
        for (i = 0; i < numatts; i++)
 
146
        {
 
147
                AttrNumber      atnum = indexInfo->ii_KeyAttrNumbers[i];
 
148
                Form_pg_attribute to = indexTupDesc->attrs[i];
 
149
                HeapTuple       tuple;
 
150
                Form_pg_type typeTup;
 
151
                Form_pg_opclass opclassTup;
 
152
                Oid                     keyType;
 
153
 
 
154
                if (atnum != 0)
 
155
                {
 
156
                        /* Simple index column */
 
157
                        Form_pg_attribute from;
 
158
 
 
159
                        if (atnum < 0)
 
160
                        {
 
161
                                /*
 
162
                                 * here we are indexing on a system attribute (-1...-n)
 
163
                                 */
 
164
                                from = SystemAttributeDefinition(atnum,
 
165
                                                                                   heapRelation->rd_rel->relhasoids);
 
166
                        }
 
167
                        else
 
168
                        {
 
169
                                /*
 
170
                                 * here we are indexing on a normal attribute (1...n)
 
171
                                 */
 
172
                                if (atnum > natts)              /* safety check */
 
173
                                        elog(ERROR, "invalid column number %d", atnum);
 
174
                                from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
 
175
                        }
 
176
 
 
177
                        /*
 
178
                         * now that we've determined the "from", let's copy the tuple desc
 
179
                         * data...
 
180
                         */
 
181
                        memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
 
182
 
 
183
                        /*
 
184
                         * Fix the stuff that should not be the same as the underlying
 
185
                         * attr
 
186
                         */
 
187
                        to->attnum = i + 1;
 
188
 
 
189
                        to->attstattarget = -1;
 
190
                        to->attcacheoff = -1;
 
191
                        to->attnotnull = false;
 
192
                        to->atthasdef = false;
 
193
                        to->attislocal = true;
 
194
                        to->attinhcount = 0;
 
195
                }
 
196
                else
 
197
                {
 
198
                        /* Expressional index */
 
199
                        Node       *indexkey;
 
200
 
 
201
                        MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
 
202
 
 
203
                        if (indexpr_item == NULL)       /* shouldn't happen */
 
204
                                elog(ERROR, "too few entries in indexprs list");
 
205
                        indexkey = (Node *) lfirst(indexpr_item);
 
206
                        indexpr_item = lnext(indexpr_item);
 
207
 
 
208
                        /*
 
209
                         * Make the attribute's name "pg_expresssion_nnn" (maybe think of
 
210
                         * something better later)
 
211
                         */
 
212
                        sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);
 
213
 
 
214
                        /*
 
215
                         * Lookup the expression type in pg_type for the type length etc.
 
216
                         */
 
217
                        keyType = exprType(indexkey);
 
218
                        tuple = SearchSysCache(TYPEOID,
 
219
                                                                   ObjectIdGetDatum(keyType),
 
220
                                                                   0, 0, 0);
 
221
                        if (!HeapTupleIsValid(tuple))
 
222
                                elog(ERROR, "cache lookup failed for type %u", keyType);
 
223
                        typeTup = (Form_pg_type) GETSTRUCT(tuple);
 
224
 
 
225
                        /*
 
226
                         * Assign some of the attributes values. Leave the rest as 0.
 
227
                         */
 
228
                        to->attnum = i + 1;
 
229
                        to->atttypid = keyType;
 
230
                        to->attlen = typeTup->typlen;
 
231
                        to->attbyval = typeTup->typbyval;
 
232
                        to->attstorage = typeTup->typstorage;
 
233
                        to->attalign = typeTup->typalign;
 
234
                        to->attstattarget = -1;
 
235
                        to->attcacheoff = -1;
 
236
                        to->atttypmod = -1;
 
237
                        to->attislocal = true;
 
238
 
 
239
                        ReleaseSysCache(tuple);
 
240
 
 
241
                        /*
 
242
                         * Make sure the expression yields a type that's safe to store in
 
243
                         * an index.  We need this defense because we have index opclasses
 
244
                         * for pseudo-types such as "record", and the actually stored type
 
245
                         * had better be safe; eg, a named composite type is okay, an
 
246
                         * anonymous record type is not.  The test is the same as for
 
247
                         * whether a table column is of a safe type (which is why we
 
248
                         * needn't check for the non-expression case).
 
249
                         */
 
250
                        CheckAttributeType(NameStr(to->attname), to->atttypid);
 
251
                }
 
252
 
 
253
                /*
 
254
                 * We do not yet have the correct relation OID for the index, so just
 
255
                 * set it invalid for now.      InitializeAttributeOids() will fix it
 
256
                 * later.
 
257
                 */
 
258
                to->attrelid = InvalidOid;
 
259
 
 
260
                /*
 
261
                 * Check the opclass and index AM to see if either provides a keytype
 
262
                 * (overriding the attribute type).  Opclass takes precedence.
 
263
                 */
 
264
                tuple = SearchSysCache(CLAOID,
 
265
                                                           ObjectIdGetDatum(classObjectId[i]),
 
266
                                                           0, 0, 0);
 
267
                if (!HeapTupleIsValid(tuple))
 
268
                        elog(ERROR, "cache lookup failed for opclass %u",
 
269
                                 classObjectId[i]);
 
270
                opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
 
271
                if (OidIsValid(opclassTup->opckeytype))
 
272
                        keyType = opclassTup->opckeytype;
 
273
                else
 
274
                        keyType = amform->amkeytype;
 
275
                ReleaseSysCache(tuple);
 
276
 
 
277
                if (OidIsValid(keyType) && keyType != to->atttypid)
 
278
                {
 
279
                        /* index value and heap value have different types */
 
280
                        tuple = SearchSysCache(TYPEOID,
 
281
                                                                   ObjectIdGetDatum(keyType),
 
282
                                                                   0, 0, 0);
 
283
                        if (!HeapTupleIsValid(tuple))
 
284
                                elog(ERROR, "cache lookup failed for type %u", keyType);
 
285
                        typeTup = (Form_pg_type) GETSTRUCT(tuple);
 
286
 
 
287
                        to->atttypid = keyType;
 
288
                        to->atttypmod = -1;
 
289
                        to->attlen = typeTup->typlen;
 
290
                        to->attbyval = typeTup->typbyval;
 
291
                        to->attalign = typeTup->typalign;
 
292
                        to->attstorage = typeTup->typstorage;
 
293
 
 
294
                        ReleaseSysCache(tuple);
 
295
                }
 
296
        }
 
297
 
 
298
        ReleaseSysCache(amtuple);
 
299
 
 
300
        return indexTupDesc;
 
301
}
 
302
 
 
303
/* ----------------------------------------------------------------
 
304
 *              InitializeAttributeOids
 
305
 * ----------------------------------------------------------------
 
306
 */
 
307
static void
 
308
InitializeAttributeOids(Relation indexRelation,
 
309
                                                int numatts,
 
310
                                                Oid indexoid)
 
311
{
 
312
        TupleDesc       tupleDescriptor;
 
313
        int                     i;
 
314
 
 
315
        tupleDescriptor = RelationGetDescr(indexRelation);
 
316
 
 
317
        for (i = 0; i < numatts; i += 1)
 
318
                tupleDescriptor->attrs[i]->attrelid = indexoid;
 
319
}
 
320
 
 
321
/* ----------------------------------------------------------------
 
322
 *              AppendAttributeTuples
 
323
 * ----------------------------------------------------------------
 
324
 */
 
325
static void
 
326
AppendAttributeTuples(Relation indexRelation, int numatts)
 
327
{
 
328
        Relation        pg_attribute;
 
329
        CatalogIndexState indstate;
 
330
        TupleDesc       indexTupDesc;
 
331
        int                     i;
 
332
 
 
333
        /*
 
334
         * open the attribute relation and its indexes
 
335
         */
 
336
        pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
 
337
 
 
338
        indstate = CatalogOpenIndexes(pg_attribute);
 
339
 
 
340
        /*
 
341
         * insert data from new index's tupdesc into pg_attribute
 
342
         */
 
343
        indexTupDesc = RelationGetDescr(indexRelation);
 
344
 
 
345
        for (i = 0; i < numatts; i++)
 
346
        {
 
347
                /*
 
348
                 * There used to be very grotty code here to set these fields, but I
 
349
                 * think it's unnecessary.  They should be set already.
 
350
                 */
 
351
                Assert(indexTupDesc->attrs[i]->attnum == i + 1);
 
352
                Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
 
353
 
 
354
                InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
 
355
        }
 
356
 
 
357
        CatalogCloseIndexes(indstate);
 
358
 
 
359
        heap_close(pg_attribute, RowExclusiveLock);
 
360
}
 
361
 
 
362
/* ----------------------------------------------------------------
 
363
 *              UpdateIndexRelation
 
364
 *
 
365
 * Construct and insert a new entry in the pg_index catalog
 
366
 * ----------------------------------------------------------------
 
367
 */
 
368
static void
 
369
UpdateIndexRelation(Oid indexoid,
 
370
                                        Oid heapoid,
 
371
                                        IndexInfo *indexInfo,
 
372
                                        Oid *classOids,
 
373
                                        int16 *coloptions,
 
374
                                        bool primary,
 
375
                                        bool isvalid)
 
376
{
 
377
        int2vector *indkey;
 
378
        oidvector  *indclass;
 
379
        int2vector *indoption;
 
380
        Datum           exprsDatum;
 
381
        Datum           predDatum;
 
382
        Datum           values[Natts_pg_index];
 
383
        bool            nulls[Natts_pg_index];
 
384
        Relation        pg_index;
 
385
        HeapTuple       tuple;
 
386
        int                     i;
 
387
 
 
388
        /*
 
389
         * Copy the index key, opclass, and indoption info into arrays (should we
 
390
         * make the caller pass them like this to start with?)
 
391
         */
 
392
        indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
 
393
        for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 
394
                indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
 
395
        indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
 
396
        indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
 
397
 
 
398
        /*
 
399
         * Convert the index expressions (if any) to a text datum
 
400
         */
 
401
        if (indexInfo->ii_Expressions != NIL)
 
402
        {
 
403
                char       *exprsString;
 
404
 
 
405
                exprsString = nodeToString(indexInfo->ii_Expressions);
 
406
                exprsDatum = CStringGetTextDatum(exprsString);
 
407
                pfree(exprsString);
 
408
        }
 
409
        else
 
410
                exprsDatum = (Datum) 0;
 
411
 
 
412
        /*
 
413
         * Convert the index predicate (if any) to a text datum.  Note we convert
 
414
         * implicit-AND format to normal explicit-AND for storage.
 
415
         */
 
416
        if (indexInfo->ii_Predicate != NIL)
 
417
        {
 
418
                char       *predString;
 
419
 
 
420
                predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
 
421
                predDatum = CStringGetTextDatum(predString);
 
422
                pfree(predString);
 
423
        }
 
424
        else
 
425
                predDatum = (Datum) 0;
 
426
 
 
427
        /*
 
428
         * open the system catalog index relation
 
429
         */
 
430
        pg_index = heap_open(IndexRelationId, RowExclusiveLock);
 
431
 
 
432
        /*
 
433
         * Build a pg_index tuple
 
434
         */
 
435
        MemSet(nulls, false, sizeof(nulls));
 
436
 
 
437
        values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
 
438
        values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
 
439
        values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
 
440
        values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
 
441
        values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
 
442
        values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
 
443
        values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
 
444
        values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
 
445
        /* we set isvalid and isready the same way */
 
446
        values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
 
447
        values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
 
448
        values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
 
449
        values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
 
450
        values[Anum_pg_index_indexprs - 1] = exprsDatum;
 
451
        if (exprsDatum == (Datum) 0)
 
452
                nulls[Anum_pg_index_indexprs - 1] = true;
 
453
        values[Anum_pg_index_indpred - 1] = predDatum;
 
454
        if (predDatum == (Datum) 0)
 
455
                nulls[Anum_pg_index_indpred - 1] = true;
 
456
 
 
457
        tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
 
458
 
 
459
        /*
 
460
         * insert the tuple into the pg_index catalog
 
461
         */
 
462
        simple_heap_insert(pg_index, tuple);
 
463
 
 
464
        /* update the indexes on pg_index */
 
465
        CatalogUpdateIndexes(pg_index, tuple);
 
466
 
 
467
        /*
 
468
         * close the relation and free the tuple
 
469
         */
 
470
        heap_close(pg_index, RowExclusiveLock);
 
471
        heap_freetuple(tuple);
 
472
}
 
473
 
 
474
 
 
475
/*
 
476
 * index_create
 
477
 *
 
478
 * heapRelationId: OID of table to build index on
 
479
 * indexRelationName: what it say
 
480
 * indexRelationId: normally, pass InvalidOid to let this routine
 
481
 *              generate an OID for the index.  During bootstrap this may be
 
482
 *              nonzero to specify a preselected OID.
 
483
 * indexInfo: same info executor uses to insert into the index
 
484
 * accessMethodObjectId: OID of index AM to use
 
485
 * tableSpaceId: OID of tablespace to use
 
486
 * classObjectId: array of index opclass OIDs, one per index column
 
487
 * coloptions: array of per-index-column indoption settings
 
488
 * reloptions: AM-specific options
 
489
 * isprimary: index is a PRIMARY KEY
 
490
 * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
 
491
 * allow_system_table_mods: allow table to be a system catalog
 
492
 * skip_build: true to skip the index_build() step for the moment; caller
 
493
 *              must do it later (typically via reindex_index())
 
494
 * concurrent: if true, do not lock the table against writers.  The index
 
495
 *              will be marked "invalid" and the caller must take additional steps
 
496
 *              to fix it up.
 
497
 *
 
498
 * Returns OID of the created index.
 
499
 */
 
500
Oid
 
501
index_create(Oid heapRelationId,
 
502
                         const char *indexRelationName,
 
503
                         Oid indexRelationId,
 
504
                         IndexInfo *indexInfo,
 
505
                         Oid accessMethodObjectId,
 
506
                         Oid tableSpaceId,
 
507
                         Oid *classObjectId,
 
508
                         int16 *coloptions,
 
509
                         Datum reloptions,
 
510
                         bool isprimary,
 
511
                         bool isconstraint,
 
512
                         bool allow_system_table_mods,
 
513
                         bool skip_build,
 
514
                         bool concurrent)
 
515
{
 
516
        Relation        pg_class;
 
517
        Relation        heapRelation;
 
518
        Relation        indexRelation;
 
519
        TupleDesc       indexTupDesc;
 
520
        bool            shared_relation;
 
521
        Oid                     namespaceId;
 
522
        int                     i;
 
523
 
 
524
        pg_class = heap_open(RelationRelationId, RowExclusiveLock);
 
525
 
 
526
        /*
 
527
         * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
 
528
         * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
 
529
         * (but not VACUUM).
 
530
         */
 
531
        heapRelation = heap_open(heapRelationId,
 
532
                                                (concurrent ? ShareUpdateExclusiveLock : ShareLock));
 
533
 
 
534
        /*
 
535
         * The index will be in the same namespace as its parent table, and is
 
536
         * shared across databases if and only if the parent is.
 
537
         */
 
538
        namespaceId = RelationGetNamespace(heapRelation);
 
539
        shared_relation = heapRelation->rd_rel->relisshared;
 
540
 
 
541
        /*
 
542
         * check parameters
 
543
         */
 
544
        if (indexInfo->ii_NumIndexAttrs < 1)
 
545
                elog(ERROR, "must index at least one column");
 
546
 
 
547
        if (!allow_system_table_mods &&
 
548
                IsSystemRelation(heapRelation) &&
 
549
                IsNormalProcessingMode())
 
550
                ereport(ERROR,
 
551
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 
552
                                 errmsg("user-defined indexes on system catalog tables are not supported")));
 
553
 
 
554
        /*
 
555
         * concurrent index build on a system catalog is unsafe because we tend to
 
556
         * release locks before committing in catalogs
 
557
         */
 
558
        if (concurrent &&
 
559
                IsSystemRelation(heapRelation))
 
560
                ereport(ERROR,
 
561
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 
562
                                 errmsg("concurrent index creation on system catalog tables is not supported")));
 
563
 
 
564
        /*
 
565
         * We cannot allow indexing a shared relation after initdb (because
 
566
         * there's no way to make the entry in other databases' pg_class).
 
567
         */
 
568
        if (shared_relation && !IsBootstrapProcessingMode())
 
569
                ereport(ERROR,
 
570
                                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 
571
                                 errmsg("shared indexes cannot be created after initdb")));
 
572
 
 
573
        /*
 
574
         * Validate shared/non-shared tablespace (must check this before doing
 
575
         * GetNewRelFileNode, to prevent Assert therein)
 
576
         */
 
577
        if (shared_relation)
 
578
        {
 
579
                if (tableSpaceId != GLOBALTABLESPACE_OID)
 
580
                        /* elog since this is not a user-facing error */
 
581
                        elog(ERROR,
 
582
                                 "shared relations must be placed in pg_global tablespace");
 
583
        }
 
584
        else
 
585
        {
 
586
                if (tableSpaceId == GLOBALTABLESPACE_OID)
 
587
                        ereport(ERROR,
 
588
                                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 
589
                                         errmsg("only shared relations can be placed in pg_global tablespace")));
 
590
        }
 
591
 
 
592
        if (get_relname_relid(indexRelationName, namespaceId))
 
593
                ereport(ERROR,
 
594
                                (errcode(ERRCODE_DUPLICATE_TABLE),
 
595
                                 errmsg("relation \"%s\" already exists",
 
596
                                                indexRelationName)));
 
597
 
 
598
        /*
 
599
         * construct tuple descriptor for index tuples
 
600
         */
 
601
        indexTupDesc = ConstructTupleDescriptor(heapRelation,
 
602
                                                                                        indexInfo,
 
603
                                                                                        accessMethodObjectId,
 
604
                                                                                        classObjectId);
 
605
 
 
606
        /*
 
607
         * Allocate an OID for the index, unless we were told what to use.
 
608
         *
 
609
         * The OID will be the relfilenode as well, so make sure it doesn't
 
610
         * collide with either pg_class OIDs or existing physical files.
 
611
         */
 
612
        if (!OidIsValid(indexRelationId))
 
613
                indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
 
614
                                                                                        pg_class);
 
615
 
 
616
        /*
 
617
         * create the index relation's relcache entry and physical disk file. (If
 
618
         * we fail further down, it's the smgr's responsibility to remove the disk
 
619
         * file again.)
 
620
         */
 
621
        indexRelation = heap_create(indexRelationName,
 
622
                                                                namespaceId,
 
623
                                                                tableSpaceId,
 
624
                                                                indexRelationId,
 
625
                                                                indexTupDesc,
 
626
                                                                RELKIND_INDEX,
 
627
                                                                shared_relation,
 
628
                                                                allow_system_table_mods);
 
629
 
 
630
        Assert(indexRelationId == RelationGetRelid(indexRelation));
 
631
 
 
632
        /*
 
633
         * Obtain exclusive lock on it.  Although no other backends can see it
 
634
         * until we commit, this prevents deadlock-risk complaints from lock
 
635
         * manager in cases such as CLUSTER.
 
636
         */
 
637
        LockRelation(indexRelation, AccessExclusiveLock);
 
638
 
 
639
        /*
 
640
         * Fill in fields of the index's pg_class entry that are not set correctly
 
641
         * by heap_create.
 
642
         *
 
643
         * XXX should have a cleaner way to create cataloged indexes
 
644
         */
 
645
        indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
 
646
        indexRelation->rd_rel->relam = accessMethodObjectId;
 
647
        indexRelation->rd_rel->relkind = RELKIND_INDEX;
 
648
        indexRelation->rd_rel->relhasoids = false;
 
649
 
 
650
        /*
 
651
         * store index's pg_class entry
 
652
         */
 
653
        InsertPgClassTuple(pg_class, indexRelation,
 
654
                                           RelationGetRelid(indexRelation),
 
655
                                           reloptions);
 
656
 
 
657
        /* done with pg_class */
 
658
        heap_close(pg_class, RowExclusiveLock);
 
659
 
 
660
        /*
 
661
         * now update the object id's of all the attribute tuple forms in the
 
662
         * index relation's tuple descriptor
 
663
         */
 
664
        InitializeAttributeOids(indexRelation,
 
665
                                                        indexInfo->ii_NumIndexAttrs,
 
666
                                                        indexRelationId);
 
667
 
 
668
        /*
 
669
         * append ATTRIBUTE tuples for the index
 
670
         */
 
671
        AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
 
672
 
 
673
        /* ----------------
 
674
         *        update pg_index
 
675
         *        (append INDEX tuple)
 
676
         *
 
677
         *        Note that this stows away a representation of "predicate".
 
678
         *        (Or, could define a rule to maintain the predicate) --Nels, Feb '92
 
679
         * ----------------
 
680
         */
 
681
        UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
 
682
                                                classObjectId, coloptions, isprimary, !concurrent);
 
683
 
 
684
        /*
 
685
         * Register constraint and dependencies for the index.
 
686
         *
 
687
         * If the index is from a CONSTRAINT clause, construct a pg_constraint
 
688
         * entry. The index is then linked to the constraint, which in turn is
 
689
         * linked to the table.  If it's not a CONSTRAINT, make the dependency
 
690
         * directly on the table.
 
691
         *
 
692
         * We don't need a dependency on the namespace, because there'll be an
 
693
         * indirect dependency via our parent table.
 
694
         *
 
695
         * During bootstrap we can't register any dependencies, and we don't try
 
696
         * to make a constraint either.
 
697
         */
 
698
        if (!IsBootstrapProcessingMode())
 
699
        {
 
700
                ObjectAddress myself,
 
701
                                        referenced;
 
702
 
 
703
                myself.classId = RelationRelationId;
 
704
                myself.objectId = indexRelationId;
 
705
                myself.objectSubId = 0;
 
706
 
 
707
                if (isconstraint)
 
708
                {
 
709
                        char            constraintType;
 
710
                        Oid                     conOid;
 
711
 
 
712
                        if (isprimary)
 
713
                                constraintType = CONSTRAINT_PRIMARY;
 
714
                        else if (indexInfo->ii_Unique)
 
715
                                constraintType = CONSTRAINT_UNIQUE;
 
716
                        else
 
717
                        {
 
718
                                elog(ERROR, "constraint must be PRIMARY or UNIQUE");
 
719
                                constraintType = 0;             /* keep compiler quiet */
 
720
                        }
 
721
 
 
722
                        /* Shouldn't have any expressions */
 
723
                        if (indexInfo->ii_Expressions)
 
724
                                elog(ERROR, "constraints cannot have index expressions");
 
725
 
 
726
                        conOid = CreateConstraintEntry(indexRelationName,
 
727
                                                                                   namespaceId,
 
728
                                                                                   constraintType,
 
729
                                                                                   false,               /* isDeferrable */
 
730
                                                                                   false,               /* isDeferred */
 
731
                                                                                   heapRelationId,
 
732
                                                                                   indexInfo->ii_KeyAttrNumbers,
 
733
                                                                                   indexInfo->ii_NumIndexAttrs,
 
734
                                                                                   InvalidOid,  /* no domain */
 
735
                                                                                   InvalidOid,  /* no foreign key */
 
736
                                                                                   NULL,
 
737
                                                                                   NULL,
 
738
                                                                                   NULL,
 
739
                                                                                   NULL,
 
740
                                                                                   0,
 
741
                                                                                   ' ',
 
742
                                                                                   ' ',
 
743
                                                                                   ' ',
 
744
                                                                                   InvalidOid,  /* no associated index */
 
745
                                                                                   NULL,                /* no check constraint */
 
746
                                                                                   NULL,
 
747
                                                                                   NULL,
 
748
                                                                                   true, /* islocal */
 
749
                                                                                   0); /* inhcount */
 
750
 
 
751
                        referenced.classId = ConstraintRelationId;
 
752
                        referenced.objectId = conOid;
 
753
                        referenced.objectSubId = 0;
 
754
 
 
755
                        recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
 
756
                }
 
757
                else
 
758
                {
 
759
                        bool            have_simple_col = false;
 
760
 
 
761
                        /* Create auto dependencies on simply-referenced columns */
 
762
                        for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 
763
                        {
 
764
                                if (indexInfo->ii_KeyAttrNumbers[i] != 0)
 
765
                                {
 
766
                                        referenced.classId = RelationRelationId;
 
767
                                        referenced.objectId = heapRelationId;
 
768
                                        referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
 
769
 
 
770
                                        recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
 
771
 
 
772
                                        have_simple_col = true;
 
773
                                }
 
774
                        }
 
775
 
 
776
                        /*
 
777
                         * It's possible for an index to not depend on any columns of the
 
778
                         * table at all, in which case we need to give it a dependency on
 
779
                         * the table as a whole; else it won't get dropped when the table
 
780
                         * is dropped.  This edge case is not totally useless; for
 
781
                         * example, a unique index on a constant expression can serve to
 
782
                         * prevent a table from containing more than one row.
 
783
                         */
 
784
                        if (!have_simple_col &&
 
785
                         !contain_vars_of_level((Node *) indexInfo->ii_Expressions, 0) &&
 
786
                                !contain_vars_of_level((Node *) indexInfo->ii_Predicate, 0))
 
787
                        {
 
788
                                referenced.classId = RelationRelationId;
 
789
                                referenced.objectId = heapRelationId;
 
790
                                referenced.objectSubId = 0;
 
791
 
 
792
                                recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
 
793
                        }
 
794
                }
 
795
 
 
796
                /* Store dependency on operator classes */
 
797
                for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 
798
                {
 
799
                        referenced.classId = OperatorClassRelationId;
 
800
                        referenced.objectId = classObjectId[i];
 
801
                        referenced.objectSubId = 0;
 
802
 
 
803
                        recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
 
804
                }
 
805
 
 
806
                /* Store dependencies on anything mentioned in index expressions */
 
807
                if (indexInfo->ii_Expressions)
 
808
                {
 
809
                        recordDependencyOnSingleRelExpr(&myself,
 
810
                                                                                  (Node *) indexInfo->ii_Expressions,
 
811
                                                                                        heapRelationId,
 
812
                                                                                        DEPENDENCY_NORMAL,
 
813
                                                                                        DEPENDENCY_AUTO);
 
814
                }
 
815
 
 
816
                /* Store dependencies on anything mentioned in predicate */
 
817
                if (indexInfo->ii_Predicate)
 
818
                {
 
819
                        recordDependencyOnSingleRelExpr(&myself,
 
820
                                                                                        (Node *) indexInfo->ii_Predicate,
 
821
                                                                                        heapRelationId,
 
822
                                                                                        DEPENDENCY_NORMAL,
 
823
                                                                                        DEPENDENCY_AUTO);
 
824
                }
 
825
        }
 
826
 
 
827
        /*
 
828
         * Advance the command counter so that we can see the newly-entered
 
829
         * catalog tuples for the index.
 
830
         */
 
831
        CommandCounterIncrement();
 
832
 
 
833
        /*
 
834
         * In bootstrap mode, we have to fill in the index strategy structure with
 
835
         * information from the catalogs.  If we aren't bootstrapping, then the
 
836
         * relcache entry has already been rebuilt thanks to sinval update during
 
837
         * CommandCounterIncrement.
 
838
         */
 
839
        if (IsBootstrapProcessingMode())
 
840
                RelationInitIndexAccessInfo(indexRelation);
 
841
        else
 
842
                Assert(indexRelation->rd_indexcxt != NULL);
 
843
 
 
844
        /*
 
845
         * If this is bootstrap (initdb) time, then we don't actually fill in the
 
846
         * index yet.  We'll be creating more indexes and classes later, so we
 
847
         * delay filling them in until just before we're done with bootstrapping.
 
848
         * Similarly, if the caller specified skip_build then filling the index is
 
849
         * delayed till later (ALTER TABLE can save work in some cases with this).
 
850
         * Otherwise, we call the AM routine that constructs the index.
 
851
         */
 
852
        if (IsBootstrapProcessingMode())
 
853
        {
 
854
                index_register(heapRelationId, indexRelationId, indexInfo);
 
855
        }
 
856
        else if (skip_build)
 
857
        {
 
858
                /*
 
859
                 * Caller is responsible for filling the index later on.  However,
 
860
                 * we'd better make sure that the heap relation is correctly marked as
 
861
                 * having an index.
 
862
                 */
 
863
                index_update_stats(heapRelation,
 
864
                                                   true,
 
865
                                                   isprimary,
 
866
                                                   InvalidOid,
 
867
                                                   heapRelation->rd_rel->reltuples);
 
868
                /* Make the above update visible */
 
869
                CommandCounterIncrement();
 
870
        }
 
871
        else
 
872
        {
 
873
                index_build(heapRelation, indexRelation, indexInfo, isprimary);
 
874
        }
 
875
 
 
876
        /*
 
877
         * Close the heap and index; but we keep the locks that we acquired above
 
878
         * until end of transaction.
 
879
         */
 
880
        index_close(indexRelation, NoLock);
 
881
        heap_close(heapRelation, NoLock);
 
882
 
 
883
        return indexRelationId;
 
884
}
 
885
 
 
886
/*
 
887
 *              index_drop
 
888
 *
 
889
 * NOTE: this routine should now only be called through performDeletion(),
 
890
 * else associated dependencies won't be cleaned up.
 
891
 */
 
892
void
 
893
index_drop(Oid indexId)
 
894
{
 
895
        Oid                     heapId;
 
896
        Relation        userHeapRelation;
 
897
        Relation        userIndexRelation;
 
898
        Relation        indexRelation;
 
899
        HeapTuple       tuple;
 
900
        bool            hasexprs;
 
901
 
 
902
        /*
 
903
         * To drop an index safely, we must grab exclusive lock on its parent
 
904
         * table; otherwise there could be other backends using the index!
 
905
         * Exclusive lock on the index alone is insufficient because another
 
906
         * backend might be in the midst of devising a query plan that will use
 
907
         * the index.  The parser and planner take care to hold an appropriate
 
908
         * lock on the parent table while working, but having them hold locks on
 
909
         * all the indexes too seems overly expensive.  We do grab exclusive lock
 
910
         * on the index too, just to be safe. Both locks must be held till end of
 
911
         * transaction, else other backends will still see this index in pg_index.
 
912
         */
 
913
        heapId = IndexGetRelation(indexId);
 
914
        userHeapRelation = heap_open(heapId, AccessExclusiveLock);
 
915
 
 
916
        userIndexRelation = index_open(indexId, AccessExclusiveLock);
 
917
 
 
918
        /*
 
919
         * Schedule physical removal of the files
 
920
         */
 
921
        RelationDropStorage(userIndexRelation);
 
922
 
 
923
        /*
 
924
         * Close and flush the index's relcache entry, to ensure relcache doesn't
 
925
         * try to rebuild it while we're deleting catalog entries. We keep the
 
926
         * lock though.
 
927
         */
 
928
        index_close(userIndexRelation, NoLock);
 
929
 
 
930
        RelationForgetRelation(indexId);
 
931
 
 
932
        /*
 
933
         * fix INDEX relation, and check for expressional index
 
934
         */
 
935
        indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
 
936
 
 
937
        tuple = SearchSysCache(INDEXRELID,
 
938
                                                   ObjectIdGetDatum(indexId),
 
939
                                                   0, 0, 0);
 
940
        if (!HeapTupleIsValid(tuple))
 
941
                elog(ERROR, "cache lookup failed for index %u", indexId);
 
942
 
 
943
        hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
 
944
 
 
945
        simple_heap_delete(indexRelation, &tuple->t_self);
 
946
 
 
947
        ReleaseSysCache(tuple);
 
948
        heap_close(indexRelation, RowExclusiveLock);
 
949
 
 
950
        /*
 
951
         * if it has any expression columns, we might have stored statistics about
 
952
         * them.
 
953
         */
 
954
        if (hasexprs)
 
955
                RemoveStatistics(indexId, 0);
 
956
 
 
957
        /*
 
958
         * fix ATTRIBUTE relation
 
959
         */
 
960
        DeleteAttributeTuples(indexId);
 
961
 
 
962
        /*
 
963
         * fix RELATION relation
 
964
         */
 
965
        DeleteRelationTuple(indexId);
 
966
 
 
967
        /*
 
968
         * We are presently too lazy to attempt to compute the new correct value
 
969
         * of relhasindex (the next VACUUM will fix it if necessary). So there is
 
970
         * no need to update the pg_class tuple for the owning relation. But we
 
971
         * must send out a shared-cache-inval notice on the owning relation to
 
972
         * ensure other backends update their relcache lists of indexes.
 
973
         */
 
974
        CacheInvalidateRelcache(userHeapRelation);
 
975
 
 
976
        /*
 
977
         * Close owning rel, but keep lock
 
978
         */
 
979
        heap_close(userHeapRelation, NoLock);
 
980
}
 
981
 
 
982
/* ----------------------------------------------------------------
 
983
 *                                              index_build support
 
984
 * ----------------------------------------------------------------
 
985
 */
 
986
 
 
987
/* ----------------
 
988
 *              BuildIndexInfo
 
989
 *                      Construct an IndexInfo record for an open index
 
990
 *
 
991
 * IndexInfo stores the information about the index that's needed by
 
992
 * FormIndexDatum, which is used for both index_build() and later insertion
 
993
 * of individual index tuples.  Normally we build an IndexInfo for an index
 
994
 * just once per command, and then use it for (potentially) many tuples.
 
995
 * ----------------
 
996
 */
 
997
IndexInfo *
 
998
BuildIndexInfo(Relation index)
 
999
{
 
1000
        IndexInfo  *ii = makeNode(IndexInfo);
 
1001
        Form_pg_index indexStruct = index->rd_index;
 
1002
        int                     i;
 
1003
        int                     numKeys;
 
1004
 
 
1005
        /* check the number of keys, and copy attr numbers into the IndexInfo */
 
1006
        numKeys = indexStruct->indnatts;
 
1007
        if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
 
1008
                elog(ERROR, "invalid indnatts %d for index %u",
 
1009
                         numKeys, RelationGetRelid(index));
 
1010
        ii->ii_NumIndexAttrs = numKeys;
 
1011
        for (i = 0; i < numKeys; i++)
 
1012
                ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
 
1013
 
 
1014
        /* fetch any expressions needed for expressional indexes */
 
1015
        ii->ii_Expressions = RelationGetIndexExpressions(index);
 
1016
        ii->ii_ExpressionsState = NIL;
 
1017
 
 
1018
        /* fetch index predicate if any */
 
1019
        ii->ii_Predicate = RelationGetIndexPredicate(index);
 
1020
        ii->ii_PredicateState = NIL;
 
1021
 
 
1022
        /* other info */
 
1023
        ii->ii_Unique = indexStruct->indisunique;
 
1024
        ii->ii_ReadyForInserts = indexStruct->indisready;
 
1025
 
 
1026
        /* initialize index-build state to default */
 
1027
        ii->ii_Concurrent = false;
 
1028
        ii->ii_BrokenHotChain = false;
 
1029
 
 
1030
        return ii;
 
1031
}
 
1032
 
 
1033
/* ----------------
 
1034
 *              FormIndexDatum
 
1035
 *                      Construct values[] and isnull[] arrays for a new index tuple.
 
1036
 *
 
1037
 *      indexInfo               Info about the index
 
1038
 *      slot                    Heap tuple for which we must prepare an index entry
 
1039
 *      estate                  executor state for evaluating any index expressions
 
1040
 *      values                  Array of index Datums (output area)
 
1041
 *      isnull                  Array of is-null indicators (output area)
 
1042
 *
 
1043
 * When there are no index expressions, estate may be NULL.  Otherwise it
 
1044
 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
 
1045
 * context must point to the heap tuple passed in.
 
1046
 *
 
1047
 * Notice we don't actually call index_form_tuple() here; we just prepare
 
1048
 * its input arrays values[] and isnull[].      This is because the index AM
 
1049
 * may wish to alter the data before storage.
 
1050
 * ----------------
 
1051
 */
 
1052
void
 
1053
FormIndexDatum(IndexInfo *indexInfo,
 
1054
                           TupleTableSlot *slot,
 
1055
                           EState *estate,
 
1056
                           Datum *values,
 
1057
                           bool *isnull)
 
1058
{
 
1059
        ListCell   *indexpr_item;
 
1060
        int                     i;
 
1061
 
 
1062
        if (indexInfo->ii_Expressions != NIL &&
 
1063
                indexInfo->ii_ExpressionsState == NIL)
 
1064
        {
 
1065
                /* First time through, set up expression evaluation state */
 
1066
                indexInfo->ii_ExpressionsState = (List *)
 
1067
                        ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
 
1068
                                                        estate);
 
1069
                /* Check caller has set up context correctly */
 
1070
                Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
 
1071
        }
 
1072
        indexpr_item = list_head(indexInfo->ii_ExpressionsState);
 
1073
 
 
1074
        for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 
1075
        {
 
1076
                int                     keycol = indexInfo->ii_KeyAttrNumbers[i];
 
1077
                Datum           iDatum;
 
1078
                bool            isNull;
 
1079
 
 
1080
                if (keycol != 0)
 
1081
                {
 
1082
                        /*
 
1083
                         * Plain index column; get the value we need directly from the
 
1084
                         * heap tuple.
 
1085
                         */
 
1086
                        iDatum = slot_getattr(slot, keycol, &isNull);
 
1087
                }
 
1088
                else
 
1089
                {
 
1090
                        /*
 
1091
                         * Index expression --- need to evaluate it.
 
1092
                         */
 
1093
                        if (indexpr_item == NULL)
 
1094
                                elog(ERROR, "wrong number of index expressions");
 
1095
                        iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
 
1096
                                                                                           GetPerTupleExprContext(estate),
 
1097
                                                                                           &isNull,
 
1098
                                                                                           NULL);
 
1099
                        indexpr_item = lnext(indexpr_item);
 
1100
                }
 
1101
                values[i] = iDatum;
 
1102
                isnull[i] = isNull;
 
1103
        }
 
1104
 
 
1105
        if (indexpr_item != NULL)
 
1106
                elog(ERROR, "wrong number of index expressions");
 
1107
}
 
1108
 
 
1109
 
 
1110
/*
 
1111
 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
 
1112
 *
 
1113
 * This routine updates the pg_class row of either an index or its parent
 
1114
 * relation after CREATE INDEX or REINDEX.      Its rather bizarre API is designed
 
1115
 * to ensure we can do all the necessary work in just one update.
 
1116
 *
 
1117
 * hasindex: set relhasindex to this value
 
1118
 * isprimary: if true, set relhaspkey true; else no change
 
1119
 * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
 
1120
 *              else no change
 
1121
 * reltuples: set reltuples to this value
 
1122
 *
 
1123
 * relpages is also updated (using RelationGetNumberOfBlocks()).
 
1124
 *
 
1125
 * NOTE: an important side-effect of this operation is that an SI invalidation
 
1126
 * message is sent out to all backends --- including me --- causing relcache
 
1127
 * entries to be flushed or updated with the new data.  This must happen even
 
1128
 * if we find that no change is needed in the pg_class row.  When updating
 
1129
 * a heap entry, this ensures that other backends find out about the new
 
1130
 * index.  When updating an index, it's important because some index AMs
 
1131
 * expect a relcache flush to occur after REINDEX.
 
1132
 */
 
1133
static void
 
1134
index_update_stats(Relation rel, bool hasindex, bool isprimary,
 
1135
                                   Oid reltoastidxid, double reltuples)
 
1136
{
 
1137
        BlockNumber relpages = RelationGetNumberOfBlocks(rel);
 
1138
        Oid                     relid = RelationGetRelid(rel);
 
1139
        Relation        pg_class;
 
1140
        HeapTuple       tuple;
 
1141
        Form_pg_class rd_rel;
 
1142
        bool            dirty;
 
1143
 
 
1144
        /*
 
1145
         * We always update the pg_class row using a non-transactional,
 
1146
         * overwrite-in-place update.  There are several reasons for this:
 
1147
         *
 
1148
         * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
 
1149
         *
 
1150
         * 2. We could be reindexing pg_class itself, in which case we can't move
 
1151
         * its pg_class row because CatalogUpdateIndexes might not know about all
 
1152
         * the indexes yet (see reindex_relation).
 
1153
         *
 
1154
         * 3. Because we execute CREATE INDEX with just share lock on the parent
 
1155
         * rel (to allow concurrent index creations), an ordinary update could
 
1156
         * suffer a tuple-concurrently-updated failure against another CREATE
 
1157
         * INDEX committing at about the same time.  We can avoid that by having
 
1158
         * them both do nontransactional updates (we assume they will both be
 
1159
         * trying to change the pg_class row to the same thing, so it doesn't
 
1160
         * matter which goes first).
 
1161
         *
 
1162
         * 4. Even with just a single CREATE INDEX, there's a risk factor because
 
1163
         * someone else might be trying to open the rel while we commit, and this
 
1164
         * creates a race condition as to whether he will see both or neither of
 
1165
         * the pg_class row versions as valid.  Again, a non-transactional update
 
1166
         * avoids the risk.  It is indeterminate which state of the row the other
 
1167
         * process will see, but it doesn't matter (if he's only taking
 
1168
         * AccessShareLock, then it's not critical that he see relhasindex true).
 
1169
         *
 
1170
         * It is safe to use a non-transactional update even though our
 
1171
         * transaction could still fail before committing.      Setting relhasindex
 
1172
         * true is safe even if there are no indexes (VACUUM will eventually fix
 
1173
         * it), and of course the relpages and reltuples counts are correct (or at
 
1174
         * least more so than the old values) regardless.
 
1175
         */
 
1176
 
 
1177
        pg_class = heap_open(RelationRelationId, RowExclusiveLock);
 
1178
 
 
1179
        /*
 
1180
         * Make a copy of the tuple to update.  Normally we use the syscache, but
 
1181
         * we can't rely on that during bootstrap or while reindexing pg_class
 
1182
         * itself.
 
1183
         */
 
1184
        if (IsBootstrapProcessingMode() ||
 
1185
                ReindexIsProcessingHeap(RelationRelationId))
 
1186
        {
 
1187
                /* don't assume syscache will work */
 
1188
                HeapScanDesc pg_class_scan;
 
1189
                ScanKeyData key[1];
 
1190
 
 
1191
                ScanKeyInit(&key[0],
 
1192
                                        ObjectIdAttributeNumber,
 
1193
                                        BTEqualStrategyNumber, F_OIDEQ,
 
1194
                                        ObjectIdGetDatum(relid));
 
1195
 
 
1196
                pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
 
1197
                tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
 
1198
                tuple = heap_copytuple(tuple);
 
1199
                heap_endscan(pg_class_scan);
 
1200
        }
 
1201
        else
 
1202
        {
 
1203
                /* normal case, use syscache */
 
1204
                tuple = SearchSysCacheCopy(RELOID,
 
1205
                                                                   ObjectIdGetDatum(relid),
 
1206
                                                                   0, 0, 0);
 
1207
        }
 
1208
 
 
1209
        if (!HeapTupleIsValid(tuple))
 
1210
                elog(ERROR, "could not find tuple for relation %u", relid);
 
1211
        rd_rel = (Form_pg_class) GETSTRUCT(tuple);
 
1212
 
 
1213
        /* Apply required updates, if any, to copied tuple */
 
1214
 
 
1215
        dirty = false;
 
1216
        if (rd_rel->relhasindex != hasindex)
 
1217
        {
 
1218
                rd_rel->relhasindex = hasindex;
 
1219
                dirty = true;
 
1220
        }
 
1221
        if (isprimary)
 
1222
        {
 
1223
                if (!rd_rel->relhaspkey)
 
1224
                {
 
1225
                        rd_rel->relhaspkey = true;
 
1226
                        dirty = true;
 
1227
                }
 
1228
        }
 
1229
        if (OidIsValid(reltoastidxid))
 
1230
        {
 
1231
                Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
 
1232
                if (rd_rel->reltoastidxid != reltoastidxid)
 
1233
                {
 
1234
                        rd_rel->reltoastidxid = reltoastidxid;
 
1235
                        dirty = true;
 
1236
                }
 
1237
        }
 
1238
        if (rd_rel->reltuples != (float4) reltuples)
 
1239
        {
 
1240
                rd_rel->reltuples = (float4) reltuples;
 
1241
                dirty = true;
 
1242
        }
 
1243
        if (rd_rel->relpages != (int32) relpages)
 
1244
        {
 
1245
                rd_rel->relpages = (int32) relpages;
 
1246
                dirty = true;
 
1247
        }
 
1248
 
 
1249
        /*
 
1250
         * If anything changed, write out the tuple
 
1251
         */
 
1252
        if (dirty)
 
1253
        {
 
1254
                heap_inplace_update(pg_class, tuple);
 
1255
                /* the above sends a cache inval message */
 
1256
        }
 
1257
        else
 
1258
        {
 
1259
                /* no need to change tuple, but force relcache inval anyway */
 
1260
                CacheInvalidateRelcacheByTuple(tuple);
 
1261
        }
 
1262
 
 
1263
        heap_freetuple(tuple);
 
1264
 
 
1265
        heap_close(pg_class, RowExclusiveLock);
 
1266
}
 
1267
 
 
1268
/*
 
1269
 * setNewRelfilenode            - assign a new relfilenode value to the relation
 
1270
 *
 
1271
 * Caller must already hold exclusive lock on the relation.
 
1272
 *
 
1273
 * The relation is marked with relfrozenxid=freezeXid (InvalidTransactionId
 
1274
 * must be passed for indexes)
 
1275
 */
 
1276
void
 
1277
setNewRelfilenode(Relation relation, TransactionId freezeXid)
 
1278
{
 
1279
        Oid                     newrelfilenode;
 
1280
        RelFileNode newrnode;
 
1281
        Relation        pg_class;
 
1282
        HeapTuple       tuple;
 
1283
        Form_pg_class rd_rel;
 
1284
 
 
1285
        /* Can't change relfilenode for nailed tables (indexes ok though) */
 
1286
        Assert(!relation->rd_isnailed ||
 
1287
                   relation->rd_rel->relkind == RELKIND_INDEX);
 
1288
        /* Can't change for shared tables or indexes */
 
1289
        Assert(!relation->rd_rel->relisshared);
 
1290
        /* Indexes must have Invalid frozenxid; other relations must not */
 
1291
        Assert((relation->rd_rel->relkind == RELKIND_INDEX &&
 
1292
                        freezeXid == InvalidTransactionId) ||
 
1293
                   TransactionIdIsNormal(freezeXid));
 
1294
 
 
1295
        /* Allocate a new relfilenode */
 
1296
        newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
 
1297
                                                                           relation->rd_rel->relisshared,
 
1298
                                                                           NULL);
 
1299
 
 
1300
        /*
 
1301
         * Find the pg_class tuple for the given relation.      This is not used
 
1302
         * during bootstrap, so okay to use heap_update always.
 
1303
         */
 
1304
        pg_class = heap_open(RelationRelationId, RowExclusiveLock);
 
1305
 
 
1306
        tuple = SearchSysCacheCopy(RELOID,
 
1307
                                                           ObjectIdGetDatum(RelationGetRelid(relation)),
 
1308
                                                           0, 0, 0);
 
1309
        if (!HeapTupleIsValid(tuple))
 
1310
                elog(ERROR, "could not find tuple for relation %u",
 
1311
                         RelationGetRelid(relation));
 
1312
        rd_rel = (Form_pg_class) GETSTRUCT(tuple);
 
1313
 
 
1314
        /*
 
1315
         * ... and create storage for corresponding forks in the new relfilenode.
 
1316
         *
 
1317
         * NOTE: any conflict in relfilenode value will be caught here
 
1318
         */
 
1319
        newrnode = relation->rd_node;
 
1320
        newrnode.relNode = newrelfilenode;
 
1321
 
 
1322
        /*
 
1323
         * Create the main fork, like heap_create() does, and drop the old
 
1324
         * storage.
 
1325
         */
 
1326
        RelationCreateStorage(newrnode, relation->rd_istemp);
 
1327
        smgrclosenode(newrnode);
 
1328
        RelationDropStorage(relation);
 
1329
 
 
1330
        /* update the pg_class row */
 
1331
        rd_rel->relfilenode = newrelfilenode;
 
1332
        rd_rel->relpages = 0;           /* it's empty until further notice */
 
1333
        rd_rel->reltuples = 0;
 
1334
        rd_rel->relfrozenxid = freezeXid;
 
1335
        simple_heap_update(pg_class, &tuple->t_self, tuple);
 
1336
        CatalogUpdateIndexes(pg_class, tuple);
 
1337
 
 
1338
        heap_freetuple(tuple);
 
1339
 
 
1340
        heap_close(pg_class, RowExclusiveLock);
 
1341
 
 
1342
        /* Make sure the relfilenode change is visible */
 
1343
        CommandCounterIncrement();
 
1344
 
 
1345
        /* Mark the rel as having a new relfilenode in current transaction */
 
1346
        RelationCacheMarkNewRelfilenode(relation);
 
1347
}
 
1348
 
 
1349
 
 
1350
/*
 
1351
 * index_build - invoke access-method-specific index build procedure
 
1352
 *
 
1353
 * On entry, the index's catalog entries are valid, and its physical disk
 
1354
 * file has been created but is empty.  We call the AM-specific build
 
1355
 * procedure to fill in the index contents.  We then update the pg_class
 
1356
 * entries of the index and heap relation as needed, using statistics
 
1357
 * returned by ambuild as well as data passed by the caller.
 
1358
 *
 
1359
 * Note: when reindexing an existing index, isprimary can be false;
 
1360
 * the index is already properly marked and need not be re-marked.
 
1361
 *
 
1362
 * Note: before Postgres 8.2, the passed-in heap and index Relations
 
1363
 * were automatically closed by this routine.  This is no longer the case.
 
1364
 * The caller opened 'em, and the caller should close 'em.
 
1365
 */
 
1366
void
 
1367
index_build(Relation heapRelation,
 
1368
                        Relation indexRelation,
 
1369
                        IndexInfo *indexInfo,
 
1370
                        bool isprimary)
 
1371
{
 
1372
        RegProcedure procedure;
 
1373
        IndexBuildResult *stats;
 
1374
        Oid                     save_userid;
 
1375
        bool            save_secdefcxt;
 
1376
 
 
1377
        /*
 
1378
         * sanity checks
 
1379
         */
 
1380
        Assert(RelationIsValid(indexRelation));
 
1381
        Assert(PointerIsValid(indexRelation->rd_am));
 
1382
 
 
1383
        procedure = indexRelation->rd_am->ambuild;
 
1384
        Assert(RegProcedureIsValid(procedure));
 
1385
 
 
1386
        /*
 
1387
         * Switch to the table owner's userid, so that any index functions are
 
1388
         * run as that user.
 
1389
         */
 
1390
        GetUserIdAndContext(&save_userid, &save_secdefcxt);
 
1391
        SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
 
1392
 
 
1393
        /*
 
1394
         * Call the access method's build procedure
 
1395
         */
 
1396
        stats = (IndexBuildResult *)
 
1397
                DatumGetPointer(OidFunctionCall3(procedure,
 
1398
                                                                                 PointerGetDatum(heapRelation),
 
1399
                                                                                 PointerGetDatum(indexRelation),
 
1400
                                                                                 PointerGetDatum(indexInfo)));
 
1401
        Assert(PointerIsValid(stats));
 
1402
 
 
1403
        /* Restore userid */
 
1404
        SetUserIdAndContext(save_userid, save_secdefcxt);
 
1405
 
 
1406
        /*
 
1407
         * If we found any potentially broken HOT chains, mark the index as not
 
1408
         * being usable until the current transaction is below the event horizon.
 
1409
         * See src/backend/access/heap/README.HOT for discussion.
 
1410
         */
 
1411
        if (indexInfo->ii_BrokenHotChain)
 
1412
        {
 
1413
                Oid                     indexId = RelationGetRelid(indexRelation);
 
1414
                Relation        pg_index;
 
1415
                HeapTuple       indexTuple;
 
1416
                Form_pg_index indexForm;
 
1417
 
 
1418
                pg_index = heap_open(IndexRelationId, RowExclusiveLock);
 
1419
 
 
1420
                indexTuple = SearchSysCacheCopy(INDEXRELID,
 
1421
                                                                                ObjectIdGetDatum(indexId),
 
1422
                                                                                0, 0, 0);
 
1423
                if (!HeapTupleIsValid(indexTuple))
 
1424
                        elog(ERROR, "cache lookup failed for index %u", indexId);
 
1425
                indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
 
1426
 
 
1427
                indexForm->indcheckxmin = true;
 
1428
                simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
 
1429
                CatalogUpdateIndexes(pg_index, indexTuple);
 
1430
 
 
1431
                heap_freetuple(indexTuple);
 
1432
                heap_close(pg_index, RowExclusiveLock);
 
1433
        }
 
1434
 
 
1435
        /*
 
1436
         * Update heap and index pg_class rows
 
1437
         */
 
1438
        index_update_stats(heapRelation,
 
1439
                                           true,
 
1440
                                           isprimary,
 
1441
                                           (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
 
1442
                                           RelationGetRelid(indexRelation) : InvalidOid,
 
1443
                                           stats->heap_tuples);
 
1444
 
 
1445
        index_update_stats(indexRelation,
 
1446
                                           false,
 
1447
                                           false,
 
1448
                                           InvalidOid,
 
1449
                                           stats->index_tuples);
 
1450
 
 
1451
        /* Make the updated versions visible */
 
1452
        CommandCounterIncrement();
 
1453
}
 
1454
 
 
1455
 
 
1456
/*
 
1457
 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
 
1458
 *
 
1459
 * This is called back from an access-method-specific index build procedure
 
1460
 * after the AM has done whatever setup it needs.  The parent heap relation
 
1461
 * is scanned to find tuples that should be entered into the index.  Each
 
1462
 * such tuple is passed to the AM's callback routine, which does the right
 
1463
 * things to add it to the new index.  After we return, the AM's index
 
1464
 * build procedure does whatever cleanup is needed; in particular, it should
 
1465
 * close the heap and index relations.
 
1466
 *
 
1467
 * The total count of heap tuples is returned.  This is for updating pg_class
 
1468
 * statistics.  (It's annoying not to be able to do that here, but we can't
 
1469
 * do it until after the relation is closed.)  Note that the index AM itself
 
1470
 * must keep track of the number of index tuples; we don't do so here because
 
1471
 * the AM might reject some of the tuples for its own reasons, such as being
 
1472
 * unable to store NULLs.
 
1473
 *
 
1474
 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
 
1475
 * any potentially broken HOT chains.  Currently, we set this if there are
 
1476
 * any RECENTLY_DEAD entries in a HOT chain, without trying very hard to
 
1477
 * detect whether they're really incompatible with the chain tip.
 
1478
 */
 
1479
double
 
1480
IndexBuildHeapScan(Relation heapRelation,
 
1481
                                   Relation indexRelation,
 
1482
                                   IndexInfo *indexInfo,
 
1483
                                   bool allow_sync,
 
1484
                                   IndexBuildCallback callback,
 
1485
                                   void *callback_state)
 
1486
{
 
1487
        HeapScanDesc scan;
 
1488
        HeapTuple       heapTuple;
 
1489
        Datum           values[INDEX_MAX_KEYS];
 
1490
        bool            isnull[INDEX_MAX_KEYS];
 
1491
        double          reltuples;
 
1492
        List       *predicate;
 
1493
        TupleTableSlot *slot;
 
1494
        EState     *estate;
 
1495
        ExprContext *econtext;
 
1496
        Snapshot        snapshot;
 
1497
        TransactionId OldestXmin;
 
1498
        BlockNumber root_blkno = InvalidBlockNumber;
 
1499
        OffsetNumber root_offsets[MaxHeapTuplesPerPage];
 
1500
 
 
1501
        /*
 
1502
         * sanity checks
 
1503
         */
 
1504
        Assert(OidIsValid(indexRelation->rd_rel->relam));
 
1505
 
 
1506
        /*
 
1507
         * Need an EState for evaluation of index expressions and partial-index
 
1508
         * predicates.  Also a slot to hold the current tuple.
 
1509
         */
 
1510
        estate = CreateExecutorState();
 
1511
        econtext = GetPerTupleExprContext(estate);
 
1512
        slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
 
1513
 
 
1514
        /* Arrange for econtext's scan tuple to be the tuple under test */
 
1515
        econtext->ecxt_scantuple = slot;
 
1516
 
 
1517
        /* Set up execution state for predicate, if any. */
 
1518
        predicate = (List *)
 
1519
                ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
 
1520
                                                estate);
 
1521
 
 
1522
        /*
 
1523
         * Prepare for scan of the base relation.  In a normal index build, we use
 
1524
         * SnapshotAny because we must retrieve all tuples and do our own time
 
1525
         * qual checks (because we have to index RECENTLY_DEAD tuples). In a
 
1526
         * concurrent build, we take a regular MVCC snapshot and index whatever's
 
1527
         * live according to that.      During bootstrap we just use SnapshotNow.
 
1528
         */
 
1529
        if (IsBootstrapProcessingMode())
 
1530
        {
 
1531
                snapshot = SnapshotNow;
 
1532
                OldestXmin = InvalidTransactionId;              /* not used */
 
1533
        }
 
1534
        else if (indexInfo->ii_Concurrent)
 
1535
        {
 
1536
                snapshot = RegisterSnapshot(GetTransactionSnapshot());
 
1537
                OldestXmin = InvalidTransactionId;              /* not used */
 
1538
        }
 
1539
        else
 
1540
        {
 
1541
                snapshot = SnapshotAny;
 
1542
                /* okay to ignore lazy VACUUMs here */
 
1543
                OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
 
1544
        }
 
1545
 
 
1546
        scan = heap_beginscan_strat(heapRelation,       /* relation */
 
1547
                                                                snapshot,               /* snapshot */
 
1548
                                                                0,                              /* number of keys */
 
1549
                                                                NULL,                   /* scan key */
 
1550
                                                                true,                   /* buffer access strategy OK */
 
1551
                                                                allow_sync);    /* syncscan OK? */
 
1552
 
 
1553
        reltuples = 0;
 
1554
 
 
1555
        /*
 
1556
         * Scan all tuples in the base relation.
 
1557
         */
 
1558
        while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 
1559
        {
 
1560
                bool            tupleIsAlive;
 
1561
 
 
1562
                CHECK_FOR_INTERRUPTS();
 
1563
 
 
1564
                /*
 
1565
                 * When dealing with a HOT-chain of updated tuples, we want to index
 
1566
                 * the values of the live tuple (if any), but index it under the TID
 
1567
                 * of the chain's root tuple.  This approach is necessary to preserve
 
1568
                 * the HOT-chain structure in the heap. So we need to be able to find
 
1569
                 * the root item offset for every tuple that's in a HOT-chain.  When
 
1570
                 * first reaching a new page of the relation, call
 
1571
                 * heap_get_root_tuples() to build a map of root item offsets on the
 
1572
                 * page.
 
1573
                 *
 
1574
                 * It might look unsafe to use this information across buffer
 
1575
                 * lock/unlock.  However, we hold ShareLock on the table so no
 
1576
                 * ordinary insert/update/delete should occur; and we hold pin on the
 
1577
                 * buffer continuously while visiting the page, so no pruning
 
1578
                 * operation can occur either.
 
1579
                 *
 
1580
                 * Note the implied assumption that there is no more than one live
 
1581
                 * tuple per HOT-chain ...
 
1582
                 */
 
1583
                if (scan->rs_cblock != root_blkno)
 
1584
                {
 
1585
                        Page            page = BufferGetPage(scan->rs_cbuf);
 
1586
 
 
1587
                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
 
1588
                        heap_get_root_tuples(page, root_offsets);
 
1589
                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
1590
 
 
1591
                        root_blkno = scan->rs_cblock;
 
1592
                }
 
1593
 
 
1594
                if (snapshot == SnapshotAny)
 
1595
                {
 
1596
                        /* do our own time qual check */
 
1597
                        bool            indexIt;
 
1598
 
 
1599
        recheck:
 
1600
 
 
1601
                        /*
 
1602
                         * We could possibly get away with not locking the buffer here,
 
1603
                         * since caller should hold ShareLock on the relation, but let's
 
1604
                         * be conservative about it.  (This remark is still correct even
 
1605
                         * with HOT-pruning: our pin on the buffer prevents pruning.)
 
1606
                         */
 
1607
                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
 
1608
 
 
1609
                        switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
 
1610
                                                                                         scan->rs_cbuf))
 
1611
                        {
 
1612
                                case HEAPTUPLE_DEAD:
 
1613
                                        /* Definitely dead, we can ignore it */
 
1614
                                        indexIt = false;
 
1615
                                        tupleIsAlive = false;
 
1616
                                        break;
 
1617
                                case HEAPTUPLE_LIVE:
 
1618
                                        /* Normal case, index and unique-check it */
 
1619
                                        indexIt = true;
 
1620
                                        tupleIsAlive = true;
 
1621
                                        break;
 
1622
                                case HEAPTUPLE_RECENTLY_DEAD:
 
1623
 
 
1624
                                        /*
 
1625
                                         * If tuple is recently deleted then we must index it
 
1626
                                         * anyway to preserve MVCC semantics.  (Pre-existing
 
1627
                                         * transactions could try to use the index after we finish
 
1628
                                         * building it, and may need to see such tuples.)
 
1629
                                         *
 
1630
                                         * However, if it was HOT-updated then we must only index
 
1631
                                         * the live tuple at the end of the HOT-chain.  Since this
 
1632
                                         * breaks semantics for pre-existing snapshots, mark the
 
1633
                                         * index as unusable for them.
 
1634
                                         *
 
1635
                                         * If we've already decided that the index will be unsafe
 
1636
                                         * for old snapshots, we may as well stop indexing
 
1637
                                         * recently-dead tuples, since there's no longer any
 
1638
                                         * point.
 
1639
                                         */
 
1640
                                        if (HeapTupleIsHotUpdated(heapTuple))
 
1641
                                        {
 
1642
                                                indexIt = false;
 
1643
                                                /* mark the index as unsafe for old snapshots */
 
1644
                                                indexInfo->ii_BrokenHotChain = true;
 
1645
                                        }
 
1646
                                        else if (indexInfo->ii_BrokenHotChain)
 
1647
                                                indexIt = false;
 
1648
                                        else
 
1649
                                                indexIt = true;
 
1650
                                        /* In any case, exclude the tuple from unique-checking */
 
1651
                                        tupleIsAlive = false;
 
1652
                                        break;
 
1653
                                case HEAPTUPLE_INSERT_IN_PROGRESS:
 
1654
 
 
1655
                                        /*
 
1656
                                         * Since caller should hold ShareLock or better, we should
 
1657
                                         * not see any tuples inserted by open transactions ---
 
1658
                                         * unless it's our own transaction. (Consider INSERT
 
1659
                                         * followed by CREATE INDEX within a transaction.)      An
 
1660
                                         * exception occurs when reindexing a system catalog,
 
1661
                                         * because we often release lock on system catalogs before
 
1662
                                         * committing.  In that case we wait for the inserting
 
1663
                                         * transaction to finish and check again.  (We could do
 
1664
                                         * that on user tables too, but since the case is not
 
1665
                                         * expected it seems better to throw an error.)
 
1666
                                         */
 
1667
                                        if (!TransactionIdIsCurrentTransactionId(
 
1668
                                                                  HeapTupleHeaderGetXmin(heapTuple->t_data)))
 
1669
                                        {
 
1670
                                                if (!IsSystemRelation(heapRelation))
 
1671
                                                        elog(ERROR, "concurrent insert in progress");
 
1672
                                                else
 
1673
                                                {
 
1674
                                                        /*
 
1675
                                                         * Must drop the lock on the buffer before we wait
 
1676
                                                         */
 
1677
                                                        TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
 
1678
 
 
1679
                                                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
1680
                                                        XactLockTableWait(xwait);
 
1681
                                                        goto recheck;
 
1682
                                                }
 
1683
                                        }
 
1684
 
 
1685
                                        /*
 
1686
                                         * We must index such tuples, since if the index build
 
1687
                                         * commits then they're good.
 
1688
                                         */
 
1689
                                        indexIt = true;
 
1690
                                        tupleIsAlive = true;
 
1691
                                        break;
 
1692
                                case HEAPTUPLE_DELETE_IN_PROGRESS:
 
1693
 
 
1694
                                        /*
 
1695
                                         * Since caller should hold ShareLock or better, we should
 
1696
                                         * not see any tuples deleted by open transactions ---
 
1697
                                         * unless it's our own transaction. (Consider DELETE
 
1698
                                         * followed by CREATE INDEX within a transaction.)      An
 
1699
                                         * exception occurs when reindexing a system catalog,
 
1700
                                         * because we often release lock on system catalogs before
 
1701
                                         * committing.  In that case we wait for the deleting
 
1702
                                         * transaction to finish and check again.  (We could do
 
1703
                                         * that on user tables too, but since the case is not
 
1704
                                         * expected it seems better to throw an error.)
 
1705
                                         */
 
1706
                                        Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
 
1707
                                        if (!TransactionIdIsCurrentTransactionId(
 
1708
                                                                  HeapTupleHeaderGetXmax(heapTuple->t_data)))
 
1709
                                        {
 
1710
                                                if (!IsSystemRelation(heapRelation))
 
1711
                                                        elog(ERROR, "concurrent delete in progress");
 
1712
                                                else
 
1713
                                                {
 
1714
                                                        /*
 
1715
                                                         * Must drop the lock on the buffer before we wait
 
1716
                                                         */
 
1717
                                                        TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
 
1718
 
 
1719
                                                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
1720
                                                        XactLockTableWait(xwait);
 
1721
                                                        goto recheck;
 
1722
                                                }
 
1723
                                        }
 
1724
 
 
1725
                                        /*
 
1726
                                         * Otherwise, we have to treat these tuples just like
 
1727
                                         * RECENTLY_DELETED ones.
 
1728
                                         */
 
1729
                                        if (HeapTupleIsHotUpdated(heapTuple))
 
1730
                                        {
 
1731
                                                indexIt = false;
 
1732
                                                /* mark the index as unsafe for old snapshots */
 
1733
                                                indexInfo->ii_BrokenHotChain = true;
 
1734
                                        }
 
1735
                                        else if (indexInfo->ii_BrokenHotChain)
 
1736
                                                indexIt = false;
 
1737
                                        else
 
1738
                                                indexIt = true;
 
1739
                                        /* In any case, exclude the tuple from unique-checking */
 
1740
                                        tupleIsAlive = false;
 
1741
                                        break;
 
1742
                                default:
 
1743
                                        elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 
1744
                                        indexIt = tupleIsAlive = false;         /* keep compiler quiet */
 
1745
                                        break;
 
1746
                        }
 
1747
 
 
1748
                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
1749
 
 
1750
                        if (!indexIt)
 
1751
                                continue;
 
1752
                }
 
1753
                else
 
1754
                {
 
1755
                        /* heap_getnext did the time qual check */
 
1756
                        tupleIsAlive = true;
 
1757
                }
 
1758
 
 
1759
                reltuples += 1;
 
1760
 
 
1761
                MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
1762
 
 
1763
                /* Set up for predicate or expression evaluation */
 
1764
                ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
 
1765
 
 
1766
                /*
 
1767
                 * In a partial index, discard tuples that don't satisfy the
 
1768
                 * predicate.
 
1769
                 */
 
1770
                if (predicate != NIL)
 
1771
                {
 
1772
                        if (!ExecQual(predicate, econtext, false))
 
1773
                                continue;
 
1774
                }
 
1775
 
 
1776
                /*
 
1777
                 * For the current heap tuple, extract all the attributes we use in
 
1778
                 * this index, and note which are null.  This also performs evaluation
 
1779
                 * of any expressions needed.
 
1780
                 */
 
1781
                FormIndexDatum(indexInfo,
 
1782
                                           slot,
 
1783
                                           estate,
 
1784
                                           values,
 
1785
                                           isnull);
 
1786
 
 
1787
                /*
 
1788
                 * You'd think we should go ahead and build the index tuple here, but
 
1789
                 * some index AMs want to do further processing on the data first.      So
 
1790
                 * pass the values[] and isnull[] arrays, instead.
 
1791
                 */
 
1792
 
 
1793
                if (HeapTupleIsHeapOnly(heapTuple))
 
1794
                {
 
1795
                        /*
 
1796
                         * For a heap-only tuple, pretend its TID is that of the root. See
 
1797
                         * src/backend/access/heap/README.HOT for discussion.
 
1798
                         */
 
1799
                        HeapTupleData rootTuple;
 
1800
                        OffsetNumber offnum;
 
1801
 
 
1802
                        rootTuple = *heapTuple;
 
1803
                        offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
 
1804
 
 
1805
                        Assert(OffsetNumberIsValid(root_offsets[offnum - 1]));
 
1806
 
 
1807
                        ItemPointerSetOffsetNumber(&rootTuple.t_self,
 
1808
                                                                           root_offsets[offnum - 1]);
 
1809
 
 
1810
                        /* Call the AM's callback routine to process the tuple */
 
1811
                        callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
 
1812
                                         callback_state);
 
1813
                }
 
1814
                else
 
1815
                {
 
1816
                        /* Call the AM's callback routine to process the tuple */
 
1817
                        callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
 
1818
                                         callback_state);
 
1819
                }
 
1820
        }
 
1821
 
 
1822
        heap_endscan(scan);
 
1823
 
 
1824
        /* we can now forget our snapshot, if set */
 
1825
        if (indexInfo->ii_Concurrent)
 
1826
                UnregisterSnapshot(snapshot);
 
1827
 
 
1828
        ExecDropSingleTupleTableSlot(slot);
 
1829
 
 
1830
        FreeExecutorState(estate);
 
1831
 
 
1832
        /* These may have been pointing to the now-gone estate */
 
1833
        indexInfo->ii_ExpressionsState = NIL;
 
1834
        indexInfo->ii_PredicateState = NIL;
 
1835
 
 
1836
        return reltuples;
 
1837
}
 
1838
 
 
1839
 
 
1840
/*
 
1841
 * validate_index - support code for concurrent index builds
 
1842
 *
 
1843
 * We do a concurrent index build by first inserting the catalog entry for the
 
1844
 * index via index_create(), marking it not indisready and not indisvalid.
 
1845
 * Then we commit our transaction and start a new one, then we wait for all
 
1846
 * transactions that could have been modifying the table to terminate.  Now
 
1847
 * we know that any subsequently-started transactions will see the index and
 
1848
 * honor its constraints on HOT updates; so while existing HOT-chains might
 
1849
 * be broken with respect to the index, no currently live tuple will have an
 
1850
 * incompatible HOT update done to it.  We now build the index normally via
 
1851
 * index_build(), while holding a weak lock that allows concurrent
 
1852
 * insert/update/delete.  Also, we index only tuples that are valid
 
1853
 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
 
1854
 * build takes care to include recently-dead tuples.  This is OK because
 
1855
 * we won't mark the index valid until all transactions that might be able
 
1856
 * to see those tuples are gone.  The reason for doing that is to avoid
 
1857
 * bogus unique-index failures due to concurrent UPDATEs (we might see
 
1858
 * different versions of the same row as being valid when we pass over them,
 
1859
 * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
 
1860
 * does not contain any tuples added to the table while we built the index.
 
1861
 *
 
1862
 * Next, we mark the index "indisready" (but still not "indisvalid") and
 
1863
 * commit the second transaction and start a third.  Again we wait for all
 
1864
 * transactions that could have been modifying the table to terminate.  Now
 
1865
 * we know that any subsequently-started transactions will see the index and
 
1866
 * insert their new tuples into it.  We then take a new reference snapshot
 
1867
 * which is passed to validate_index().  Any tuples that are valid according
 
1868
 * to this snap, but are not in the index, must be added to the index.
 
1869
 * (Any tuples committed live after the snap will be inserted into the
 
1870
 * index by their originating transaction.      Any tuples committed dead before
 
1871
 * the snap need not be indexed, because we will wait out all transactions
 
1872
 * that might care about them before we mark the index valid.)
 
1873
 *
 
1874
 * validate_index() works by first gathering all the TIDs currently in the
 
1875
 * index, using a bulkdelete callback that just stores the TIDs and doesn't
 
1876
 * ever say "delete it".  (This should be faster than a plain indexscan;
 
1877
 * also, not all index AMs support full-index indexscan.)  Then we sort the
 
1878
 * TIDs, and finally scan the table doing a "merge join" against the TID list
 
1879
 * to see which tuples are missing from the index.      Thus we will ensure that
 
1880
 * all tuples valid according to the reference snapshot are in the index.
 
1881
 *
 
1882
 * Building a unique index this way is tricky: we might try to insert a
 
1883
 * tuple that is already dead or is in process of being deleted, and we
 
1884
 * mustn't have a uniqueness failure against an updated version of the same
 
1885
 * row.  We could try to check the tuple to see if it's already dead and tell
 
1886
 * index_insert() not to do the uniqueness check, but that still leaves us
 
1887
 * with a race condition against an in-progress update.  To handle that,
 
1888
 * we expect the index AM to recheck liveness of the to-be-inserted tuple
 
1889
 * before it declares a uniqueness error.
 
1890
 *
 
1891
 * After completing validate_index(), we wait until all transactions that
 
1892
 * were alive at the time of the reference snapshot are gone; this is
 
1893
 * necessary to be sure there are none left with a serializable snapshot
 
1894
 * older than the reference (and hence possibly able to see tuples we did
 
1895
 * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
 
1896
 * transactions will be able to use it for queries.
 
1897
 *
 
1898
 * Doing two full table scans is a brute-force strategy.  We could try to be
 
1899
 * cleverer, eg storing new tuples in a special area of the table (perhaps
 
1900
 * making the table append-only by setting use_fsm).  However that would
 
1901
 * add yet more locking issues.
 
1902
 */
 
1903
void
 
1904
validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
 
1905
{
 
1906
        Relation        heapRelation,
 
1907
                                indexRelation;
 
1908
        IndexInfo  *indexInfo;
 
1909
        IndexVacuumInfo ivinfo;
 
1910
        v_i_state       state;
 
1911
        Oid                     save_userid;
 
1912
        bool            save_secdefcxt;
 
1913
 
 
1914
        /* Open and lock the parent heap relation */
 
1915
        heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
 
1916
        /* And the target index relation */
 
1917
        indexRelation = index_open(indexId, RowExclusiveLock);
 
1918
 
 
1919
        /*
 
1920
         * Fetch info needed for index_insert.  (You might think this should be
 
1921
         * passed in from DefineIndex, but its copy is long gone due to having
 
1922
         * been built in a previous transaction.)
 
1923
         */
 
1924
        indexInfo = BuildIndexInfo(indexRelation);
 
1925
 
 
1926
        /* mark build is concurrent just for consistency */
 
1927
        indexInfo->ii_Concurrent = true;
 
1928
 
 
1929
        /*
 
1930
         * Switch to the table owner's userid, so that any index functions are
 
1931
         * run as that user.
 
1932
         */
 
1933
        GetUserIdAndContext(&save_userid, &save_secdefcxt);
 
1934
        SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
 
1935
 
 
1936
        /*
 
1937
         * Scan the index and gather up all the TIDs into a tuplesort object.
 
1938
         */
 
1939
        ivinfo.index = indexRelation;
 
1940
        ivinfo.vacuum_full = false;
 
1941
        ivinfo.analyze_only = false;
 
1942
        ivinfo.message_level = DEBUG2;
 
1943
        ivinfo.num_heap_tuples = -1;
 
1944
        ivinfo.strategy = NULL;
 
1945
 
 
1946
        state.tuplesort = tuplesort_begin_datum(TIDOID,
 
1947
                                                                                        TIDLessOperator, false,
 
1948
                                                                                        maintenance_work_mem,
 
1949
                                                                                        false);
 
1950
        state.htups = state.itups = state.tups_inserted = 0;
 
1951
 
 
1952
        (void) index_bulk_delete(&ivinfo, NULL,
 
1953
                                                         validate_index_callback, (void *) &state);
 
1954
 
 
1955
        /* Execute the sort */
 
1956
        tuplesort_performsort(state.tuplesort);
 
1957
 
 
1958
        /*
 
1959
         * Now scan the heap and "merge" it with the index
 
1960
         */
 
1961
        validate_index_heapscan(heapRelation,
 
1962
                                                        indexRelation,
 
1963
                                                        indexInfo,
 
1964
                                                        snapshot,
 
1965
                                                        &state);
 
1966
 
 
1967
        /* Done with tuplesort object */
 
1968
        tuplesort_end(state.tuplesort);
 
1969
 
 
1970
        elog(DEBUG2,
 
1971
                 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
 
1972
                 state.htups, state.itups, state.tups_inserted);
 
1973
 
 
1974
        /* Restore userid */
 
1975
        SetUserIdAndContext(save_userid, save_secdefcxt);
 
1976
 
 
1977
        /* Close rels, but keep locks */
 
1978
        index_close(indexRelation, NoLock);
 
1979
        heap_close(heapRelation, NoLock);
 
1980
}
 
1981
 
 
1982
/*
 
1983
 * validate_index_callback - bulkdelete callback to collect the index TIDs
 
1984
 */
 
1985
static bool
 
1986
validate_index_callback(ItemPointer itemptr, void *opaque)
 
1987
{
 
1988
        v_i_state  *state = (v_i_state *) opaque;
 
1989
 
 
1990
        tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
 
1991
        state->itups += 1;
 
1992
        return false;                           /* never actually delete anything */
 
1993
}
 
1994
 
 
1995
/*
 
1996
 * validate_index_heapscan - second table scan for concurrent index build
 
1997
 *
 
1998
 * This has much code in common with IndexBuildHeapScan, but it's enough
 
1999
 * different that it seems cleaner to have two routines not one.
 
2000
 */
 
2001
static void
 
2002
validate_index_heapscan(Relation heapRelation,
 
2003
                                                Relation indexRelation,
 
2004
                                                IndexInfo *indexInfo,
 
2005
                                                Snapshot snapshot,
 
2006
                                                v_i_state *state)
 
2007
{
 
2008
        HeapScanDesc scan;
 
2009
        HeapTuple       heapTuple;
 
2010
        Datum           values[INDEX_MAX_KEYS];
 
2011
        bool            isnull[INDEX_MAX_KEYS];
 
2012
        List       *predicate;
 
2013
        TupleTableSlot *slot;
 
2014
        EState     *estate;
 
2015
        ExprContext *econtext;
 
2016
        BlockNumber root_blkno = InvalidBlockNumber;
 
2017
        OffsetNumber root_offsets[MaxHeapTuplesPerPage];
 
2018
        bool            in_index[MaxHeapTuplesPerPage];
 
2019
 
 
2020
        /* state variables for the merge */
 
2021
        ItemPointer indexcursor = NULL;
 
2022
        bool            tuplesort_empty = false;
 
2023
 
 
2024
        /*
 
2025
         * sanity checks
 
2026
         */
 
2027
        Assert(OidIsValid(indexRelation->rd_rel->relam));
 
2028
 
 
2029
        /*
 
2030
         * Need an EState for evaluation of index expressions and partial-index
 
2031
         * predicates.  Also a slot to hold the current tuple.
 
2032
         */
 
2033
        estate = CreateExecutorState();
 
2034
        econtext = GetPerTupleExprContext(estate);
 
2035
        slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
 
2036
 
 
2037
        /* Arrange for econtext's scan tuple to be the tuple under test */
 
2038
        econtext->ecxt_scantuple = slot;
 
2039
 
 
2040
        /* Set up execution state for predicate, if any. */
 
2041
        predicate = (List *)
 
2042
                ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
 
2043
                                                estate);
 
2044
 
 
2045
        /*
 
2046
         * Prepare for scan of the base relation.  We need just those tuples
 
2047
         * satisfying the passed-in reference snapshot.  We must disable syncscan
 
2048
         * here, because it's critical that we read from block zero forward to
 
2049
         * match the sorted TIDs.
 
2050
         */
 
2051
        scan = heap_beginscan_strat(heapRelation,       /* relation */
 
2052
                                                                snapshot,               /* snapshot */
 
2053
                                                                0,                              /* number of keys */
 
2054
                                                                NULL,                   /* scan key */
 
2055
                                                                true,                   /* buffer access strategy OK */
 
2056
                                                                false);                 /* syncscan not OK */
 
2057
 
 
2058
        /*
 
2059
         * Scan all tuples matching the snapshot.
 
2060
         */
 
2061
        while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 
2062
        {
 
2063
                ItemPointer heapcursor = &heapTuple->t_self;
 
2064
                ItemPointerData rootTuple;
 
2065
                OffsetNumber root_offnum;
 
2066
 
 
2067
                CHECK_FOR_INTERRUPTS();
 
2068
 
 
2069
                state->htups += 1;
 
2070
 
 
2071
                /*
 
2072
                 * As commented in IndexBuildHeapScan, we should index heap-only
 
2073
                 * tuples under the TIDs of their root tuples; so when we advance onto
 
2074
                 * a new heap page, build a map of root item offsets on the page.
 
2075
                 *
 
2076
                 * This complicates merging against the tuplesort output: we will
 
2077
                 * visit the live tuples in order by their offsets, but the root
 
2078
                 * offsets that we need to compare against the index contents might be
 
2079
                 * ordered differently.  So we might have to "look back" within the
 
2080
                 * tuplesort output, but only within the current page.  We handle that
 
2081
                 * by keeping a bool array in_index[] showing all the
 
2082
                 * already-passed-over tuplesort output TIDs of the current page. We
 
2083
                 * clear that array here, when advancing onto a new heap page.
 
2084
                 */
 
2085
                if (scan->rs_cblock != root_blkno)
 
2086
                {
 
2087
                        Page            page = BufferGetPage(scan->rs_cbuf);
 
2088
 
 
2089
                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
 
2090
                        heap_get_root_tuples(page, root_offsets);
 
2091
                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
2092
 
 
2093
                        memset(in_index, 0, sizeof(in_index));
 
2094
 
 
2095
                        root_blkno = scan->rs_cblock;
 
2096
                }
 
2097
 
 
2098
                /* Convert actual tuple TID to root TID */
 
2099
                rootTuple = *heapcursor;
 
2100
                root_offnum = ItemPointerGetOffsetNumber(heapcursor);
 
2101
 
 
2102
                if (HeapTupleIsHeapOnly(heapTuple))
 
2103
                {
 
2104
                        root_offnum = root_offsets[root_offnum - 1];
 
2105
                        Assert(OffsetNumberIsValid(root_offnum));
 
2106
                        ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
 
2107
                }
 
2108
 
 
2109
                /*
 
2110
                 * "merge" by skipping through the index tuples until we find or pass
 
2111
                 * the current root tuple.
 
2112
                 */
 
2113
                while (!tuplesort_empty &&
 
2114
                           (!indexcursor ||
 
2115
                                ItemPointerCompare(indexcursor, &rootTuple) < 0))
 
2116
                {
 
2117
                        Datum           ts_val;
 
2118
                        bool            ts_isnull;
 
2119
 
 
2120
                        if (indexcursor)
 
2121
                        {
 
2122
                                /*
 
2123
                                 * Remember index items seen earlier on the current heap page
 
2124
                                 */
 
2125
                                if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
 
2126
                                        in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
 
2127
                                pfree(indexcursor);
 
2128
                        }
 
2129
 
 
2130
                        tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
 
2131
                                                                                                  &ts_val, &ts_isnull);
 
2132
                        Assert(tuplesort_empty || !ts_isnull);
 
2133
                        indexcursor = (ItemPointer) DatumGetPointer(ts_val);
 
2134
                }
 
2135
 
 
2136
                /*
 
2137
                 * If the tuplesort has overshot *and* we didn't see a match earlier,
 
2138
                 * then this tuple is missing from the index, so insert it.
 
2139
                 */
 
2140
                if ((tuplesort_empty ||
 
2141
                         ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
 
2142
                        !in_index[root_offnum - 1])
 
2143
                {
 
2144
                        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
2145
 
 
2146
                        /* Set up for predicate or expression evaluation */
 
2147
                        ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
 
2148
 
 
2149
                        /*
 
2150
                         * In a partial index, discard tuples that don't satisfy the
 
2151
                         * predicate.
 
2152
                         */
 
2153
                        if (predicate != NIL)
 
2154
                        {
 
2155
                                if (!ExecQual(predicate, econtext, false))
 
2156
                                        continue;
 
2157
                        }
 
2158
 
 
2159
                        /*
 
2160
                         * For the current heap tuple, extract all the attributes we use
 
2161
                         * in this index, and note which are null.      This also performs
 
2162
                         * evaluation of any expressions needed.
 
2163
                         */
 
2164
                        FormIndexDatum(indexInfo,
 
2165
                                                   slot,
 
2166
                                                   estate,
 
2167
                                                   values,
 
2168
                                                   isnull);
 
2169
 
 
2170
                        /*
 
2171
                         * You'd think we should go ahead and build the index tuple here,
 
2172
                         * but some index AMs want to do further processing on the data
 
2173
                         * first. So pass the values[] and isnull[] arrays, instead.
 
2174
                         */
 
2175
 
 
2176
                        /*
 
2177
                         * If the tuple is already committed dead, you might think we
 
2178
                         * could suppress uniqueness checking, but this is no longer true
 
2179
                         * in the presence of HOT, because the insert is actually a proxy
 
2180
                         * for a uniqueness check on the whole HOT-chain.  That is, the
 
2181
                         * tuple we have here could be dead because it was already
 
2182
                         * HOT-updated, and if so the updating transaction will not have
 
2183
                         * thought it should insert index entries.      The index AM will
 
2184
                         * check the whole HOT-chain and correctly detect a conflict if
 
2185
                         * there is one.
 
2186
                         */
 
2187
 
 
2188
                        index_insert(indexRelation,
 
2189
                                                 values,
 
2190
                                                 isnull,
 
2191
                                                 &rootTuple,
 
2192
                                                 heapRelation,
 
2193
                                                 indexInfo->ii_Unique);
 
2194
 
 
2195
                        state->tups_inserted += 1;
 
2196
                }
 
2197
        }
 
2198
 
 
2199
        heap_endscan(scan);
 
2200
 
 
2201
        ExecDropSingleTupleTableSlot(slot);
 
2202
 
 
2203
        FreeExecutorState(estate);
 
2204
 
 
2205
        /* These may have been pointing to the now-gone estate */
 
2206
        indexInfo->ii_ExpressionsState = NIL;
 
2207
        indexInfo->ii_PredicateState = NIL;
 
2208
}
 
2209
 
 
2210
 
 
2211
/*
 
2212
 * IndexGetRelation: given an index's relation OID, get the OID of the
 
2213
 * relation it is an index on.  Uses the system cache.
 
2214
 */
 
2215
static Oid
 
2216
IndexGetRelation(Oid indexId)
 
2217
{
 
2218
        HeapTuple       tuple;
 
2219
        Form_pg_index index;
 
2220
        Oid                     result;
 
2221
 
 
2222
        tuple = SearchSysCache(INDEXRELID,
 
2223
                                                   ObjectIdGetDatum(indexId),
 
2224
                                                   0, 0, 0);
 
2225
        if (!HeapTupleIsValid(tuple))
 
2226
                elog(ERROR, "cache lookup failed for index %u", indexId);
 
2227
        index = (Form_pg_index) GETSTRUCT(tuple);
 
2228
        Assert(index->indexrelid == indexId);
 
2229
 
 
2230
        result = index->indrelid;
 
2231
        ReleaseSysCache(tuple);
 
2232
        return result;
 
2233
}
 
2234
 
 
2235
/*
 
2236
 * reindex_index - This routine is used to recreate a single index
 
2237
 */
 
2238
void
 
2239
reindex_index(Oid indexId)
 
2240
{
 
2241
        Relation        iRel,
 
2242
                                heapRelation,
 
2243
                                pg_index;
 
2244
        Oid                     heapId;
 
2245
        bool            inplace;
 
2246
        IndexInfo  *indexInfo;
 
2247
        HeapTuple       indexTuple;
 
2248
        Form_pg_index indexForm;
 
2249
 
 
2250
        /*
 
2251
         * Open and lock the parent heap relation.      ShareLock is sufficient since
 
2252
         * we only need to be sure no schema or data changes are going on.
 
2253
         */
 
2254
        heapId = IndexGetRelation(indexId);
 
2255
        heapRelation = heap_open(heapId, ShareLock);
 
2256
 
 
2257
        /*
 
2258
         * Open the target index relation and get an exclusive lock on it, to
 
2259
         * ensure that no one else is touching this particular index.
 
2260
         */
 
2261
        iRel = index_open(indexId, AccessExclusiveLock);
 
2262
 
 
2263
        /*
 
2264
         * Don't allow reindex on temp tables of other backends ... their local
 
2265
         * buffer manager is not going to cope.
 
2266
         */
 
2267
        if (isOtherTempNamespace(RelationGetNamespace(iRel)))
 
2268
                ereport(ERROR,
 
2269
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 
2270
                                 errmsg("cannot reindex temporary tables of other sessions")));
 
2271
 
 
2272
        /*
 
2273
         * Also check for active uses of the index in the current transaction;
 
2274
         * we don't want to reindex underneath an open indexscan.
 
2275
         */
 
2276
        CheckTableNotInUse(iRel, "REINDEX INDEX");
 
2277
 
 
2278
        /*
 
2279
         * If it's a shared index, we must do inplace processing (because we have
 
2280
         * no way to update relfilenode in other databases).  Otherwise we can do
 
2281
         * it the normal transaction-safe way.
 
2282
         *
 
2283
         * Since inplace processing isn't crash-safe, we only allow it in a
 
2284
         * standalone backend.  (In the REINDEX TABLE and REINDEX DATABASE cases,
 
2285
         * the caller should have detected this.)
 
2286
         */
 
2287
        inplace = iRel->rd_rel->relisshared;
 
2288
 
 
2289
        if (inplace && IsUnderPostmaster)
 
2290
                ereport(ERROR,
 
2291
                                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 
2292
                                 errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
 
2293
                                                RelationGetRelationName(iRel))));
 
2294
 
 
2295
        PG_TRY();
 
2296
        {
 
2297
                /* Suppress use of the target index while rebuilding it */
 
2298
                SetReindexProcessing(heapId, indexId);
 
2299
 
 
2300
                /* Fetch info needed for index_build */
 
2301
                indexInfo = BuildIndexInfo(iRel);
 
2302
 
 
2303
                if (inplace)
 
2304
                {
 
2305
                        /*
 
2306
                         * Truncate the actual file (and discard buffers).
 
2307
                         */
 
2308
                        RelationTruncate(iRel, 0);
 
2309
                }
 
2310
                else
 
2311
                {
 
2312
                        /*
 
2313
                         * We'll build a new physical relation for the index.
 
2314
                         */
 
2315
                        setNewRelfilenode(iRel, InvalidTransactionId);
 
2316
                }
 
2317
 
 
2318
                /* Initialize the index and rebuild */
 
2319
                /* Note: we do not need to re-establish pkey setting */
 
2320
                index_build(heapRelation, iRel, indexInfo, false);
 
2321
        }
 
2322
        PG_CATCH();
 
2323
        {
 
2324
                /* Make sure flag gets cleared on error exit */
 
2325
                ResetReindexProcessing();
 
2326
                PG_RE_THROW();
 
2327
        }
 
2328
        PG_END_TRY();
 
2329
        ResetReindexProcessing();
 
2330
 
 
2331
        /*
 
2332
         * If the index is marked invalid or not ready (ie, it's from a failed
 
2333
         * CREATE INDEX CONCURRENTLY), we can now mark it valid.  This allows
 
2334
         * REINDEX to be used to clean up in such cases.
 
2335
         *
 
2336
         * We can also reset indcheckxmin, because we have now done a
 
2337
         * non-concurrent index build, *except* in the case where index_build
 
2338
         * found some still-broken HOT chains.
 
2339
         */
 
2340
        pg_index = heap_open(IndexRelationId, RowExclusiveLock);
 
2341
 
 
2342
        indexTuple = SearchSysCacheCopy(INDEXRELID,
 
2343
                                                                        ObjectIdGetDatum(indexId),
 
2344
                                                                        0, 0, 0);
 
2345
        if (!HeapTupleIsValid(indexTuple))
 
2346
                elog(ERROR, "cache lookup failed for index %u", indexId);
 
2347
        indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
 
2348
 
 
2349
        if (!indexForm->indisvalid || !indexForm->indisready ||
 
2350
                (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
 
2351
        {
 
2352
                indexForm->indisvalid = true;
 
2353
                indexForm->indisready = true;
 
2354
                if (!indexInfo->ii_BrokenHotChain)
 
2355
                        indexForm->indcheckxmin = false;
 
2356
                simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
 
2357
                CatalogUpdateIndexes(pg_index, indexTuple);
 
2358
        }
 
2359
        heap_close(pg_index, RowExclusiveLock);
 
2360
 
 
2361
        /* Close rels, but keep locks */
 
2362
        index_close(iRel, NoLock);
 
2363
        heap_close(heapRelation, NoLock);
 
2364
}
 
2365
 
 
2366
/*
 
2367
 * reindex_relation - This routine is used to recreate all indexes
 
2368
 * of a relation (and optionally its toast relation too, if any).
 
2369
 *
 
2370
 * Returns true if any indexes were rebuilt.  Note that a
 
2371
 * CommandCounterIncrement will occur after each index rebuild.
 
2372
 */
 
2373
bool
 
2374
reindex_relation(Oid relid, bool toast_too)
 
2375
{
 
2376
        Relation        rel;
 
2377
        Oid                     toast_relid;
 
2378
        bool            is_pg_class;
 
2379
        bool            result;
 
2380
        List       *indexIds,
 
2381
                           *doneIndexes;
 
2382
        ListCell   *indexId;
 
2383
 
 
2384
        /*
 
2385
         * Open and lock the relation.  ShareLock is sufficient since we only need
 
2386
         * to prevent schema and data changes in it.
 
2387
         */
 
2388
        rel = heap_open(relid, ShareLock);
 
2389
 
 
2390
        toast_relid = rel->rd_rel->reltoastrelid;
 
2391
 
 
2392
        /*
 
2393
         * Get the list of index OIDs for this relation.  (We trust to the
 
2394
         * relcache to get this with a sequential scan if ignoring system
 
2395
         * indexes.)
 
2396
         */
 
2397
        indexIds = RelationGetIndexList(rel);
 
2398
 
 
2399
        /*
 
2400
         * reindex_index will attempt to update the pg_class rows for the relation
 
2401
         * and index.  If we are processing pg_class itself, we want to make sure
 
2402
         * that the updates do not try to insert index entries into indexes we
 
2403
         * have not processed yet.      (When we are trying to recover from corrupted
 
2404
         * indexes, that could easily cause a crash.) We can accomplish this
 
2405
         * because CatalogUpdateIndexes will use the relcache's index list to know
 
2406
         * which indexes to update. We just force the index list to be only the
 
2407
         * stuff we've processed.
 
2408
         *
 
2409
         * It is okay to not insert entries into the indexes we have not processed
 
2410
         * yet because all of this is transaction-safe.  If we fail partway
 
2411
         * through, the updated rows are dead and it doesn't matter whether they
 
2412
         * have index entries.  Also, a new pg_class index will be created with an
 
2413
         * entry for its own pg_class row because we do setNewRelfilenode() before
 
2414
         * we do index_build().
 
2415
         *
 
2416
         * Note that we also clear pg_class's rd_oidindex until the loop is done,
 
2417
         * so that that index can't be accessed either.  This means we cannot
 
2418
         * safely generate new relation OIDs while in the loop; shouldn't be a
 
2419
         * problem.
 
2420
         */
 
2421
        is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
 
2422
 
 
2423
        /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
 
2424
        if (is_pg_class)
 
2425
                (void) RelationGetIndexAttrBitmap(rel);
 
2426
 
 
2427
        /* Reindex all the indexes. */
 
2428
        doneIndexes = NIL;
 
2429
        foreach(indexId, indexIds)
 
2430
        {
 
2431
                Oid                     indexOid = lfirst_oid(indexId);
 
2432
 
 
2433
                if (is_pg_class)
 
2434
                        RelationSetIndexList(rel, doneIndexes, InvalidOid);
 
2435
 
 
2436
                reindex_index(indexOid);
 
2437
 
 
2438
                CommandCounterIncrement();
 
2439
 
 
2440
                if (is_pg_class)
 
2441
                        doneIndexes = lappend_oid(doneIndexes, indexOid);
 
2442
        }
 
2443
 
 
2444
        if (is_pg_class)
 
2445
                RelationSetIndexList(rel, indexIds, ClassOidIndexId);
 
2446
 
 
2447
        /*
 
2448
         * Close rel, but continue to hold the lock.
 
2449
         */
 
2450
        heap_close(rel, NoLock);
 
2451
 
 
2452
        result = (indexIds != NIL);
 
2453
 
 
2454
        /*
 
2455
         * If the relation has a secondary toast rel, reindex that too while we
 
2456
         * still hold the lock on the master table.
 
2457
         */
 
2458
        if (toast_too && OidIsValid(toast_relid))
 
2459
                result |= reindex_relation(toast_relid, false);
 
2460
 
 
2461
        return result;
 
2462
}