2
* contrib/pageinspect/btreefuncs.c
7
* Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
9
* Permission to use, copy, modify, and distribute this software and
10
* its documentation for any purpose, without fee, and without a
11
* written agreement is hereby granted, provided that the above
12
* copyright notice and this paragraph and the following two
13
* paragraphs appear in all copies.
15
* IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16
* INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17
* LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18
* DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19
* OF THE POSSIBILITY OF SUCH DAMAGE.
21
* THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
* A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24
* IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25
* SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
30
#include "access/heapam.h"
31
#include "access/nbtree.h"
32
#include "catalog/namespace.h"
33
#include "catalog/pg_type.h"
35
#include "miscadmin.h"
36
#include "storage/bufmgr.h"
37
#include "utils/builtins.h"
40
extern Datum bt_metap(PG_FUNCTION_ARGS);
41
extern Datum bt_page_items(PG_FUNCTION_ARGS);
42
extern Datum bt_page_stats(PG_FUNCTION_ARGS);
44
PG_FUNCTION_INFO_V1(bt_metap);
45
PG_FUNCTION_INFO_V1(bt_page_items);
46
PG_FUNCTION_INFO_V1(bt_page_stats);
48
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
49
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
51
#define CHECK_PAGE_OFFSET_RANGE(pg, offnum) { \
52
if ( !(FirstOffsetNumber <= (offnum) && \
53
(offnum) <= PageGetMaxOffsetNumber(pg)) ) \
54
elog(ERROR, "page offset number out of range"); }
56
/* note: BlockNumber is unsigned, hence can't be negative */
57
#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
58
if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
59
elog(ERROR, "block number out of range"); }
61
/* ------------------------------------------------
62
* structure for single btree page statistics
63
* ------------------------------------------------
65
typedef struct BTPageStat
77
BlockNumber btpo_prev;
78
BlockNumber btpo_next;
85
BTCycleId btpo_cycleid;
89
/* -------------------------------------------------
90
* GetBTPageStatistics()
92
* Collect statistics of single b-tree page
93
* -------------------------------------------------
96
GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
98
Page page = BufferGetPage(buffer);
99
PageHeader phdr = (PageHeader) page;
100
OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
101
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
107
stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
109
stat->dead_items = stat->live_items = 0;
111
stat->page_size = PageGetPageSize(page);
113
/* page type (flags) */
114
if (P_ISDELETED(opaque))
117
stat->btpo.xact = opaque->btpo.xact;
120
else if (P_IGNORE(opaque))
122
else if (P_ISLEAF(opaque))
124
else if (P_ISROOT(opaque))
129
/* btpage opaque data */
130
stat->btpo_prev = opaque->btpo_prev;
131
stat->btpo_next = opaque->btpo_next;
132
stat->btpo.level = opaque->btpo.level;
133
stat->btpo_flags = opaque->btpo_flags;
134
stat->btpo_cycleid = opaque->btpo_cycleid;
136
/* count live and dead tuples, and free space */
137
for (off = FirstOffsetNumber; off <= maxoff; off++)
141
ItemId id = PageGetItemId(page, off);
143
itup = (IndexTuple) PageGetItem(page, id);
145
item_size += IndexTupleSize(itup);
147
if (!ItemIdIsDead(id))
152
stat->free_size = PageGetFreeSpace(page);
154
if ((stat->live_items + stat->dead_items) > 0)
155
stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
157
stat->avg_item_size = 0;
160
/* -----------------------------------------------
163
* Usage: SELECT * FROM bt_page('t1_pkey', 1);
164
* -----------------------------------------------
167
bt_page_stats(PG_FUNCTION_ARGS)
169
text *relname = PG_GETARG_TEXT_P(0);
170
uint32 blkno = PG_GETARG_UINT32(1);
183
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
184
(errmsg("must be superuser to use pageinspect functions"))));
186
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
187
rel = relation_openrv(relrv, AccessShareLock);
189
if (!IS_INDEX(rel) || !IS_BTREE(rel))
190
elog(ERROR, "relation \"%s\" is not a btree index",
191
RelationGetRelationName(rel));
194
* Reject attempts to read non-local temporary relations; we would be
195
* likely to get wrong data since we have no visibility into the owning
196
* session's local buffers.
198
if (RELATION_IS_OTHER_TEMP(rel))
200
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
201
errmsg("cannot access temporary tables of other sessions")));
204
elog(ERROR, "block 0 is a meta page");
206
CHECK_RELATION_BLOCK_RANGE(rel, blkno);
208
buffer = ReadBuffer(rel, blkno);
210
/* keep compiler quiet */
211
stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
212
stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
214
GetBTPageStatistics(blkno, buffer, &stat);
216
/* Build a tuple descriptor for our result type */
217
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
218
elog(ERROR, "return type must be a row type");
221
values[j] = palloc(32);
222
snprintf(values[j++], 32, "%d", stat.blkno);
223
values[j] = palloc(32);
224
snprintf(values[j++], 32, "%c", stat.type);
225
values[j] = palloc(32);
226
snprintf(values[j++], 32, "%d", stat.live_items);
227
values[j] = palloc(32);
228
snprintf(values[j++], 32, "%d", stat.dead_items);
229
values[j] = palloc(32);
230
snprintf(values[j++], 32, "%d", stat.avg_item_size);
231
values[j] = palloc(32);
232
snprintf(values[j++], 32, "%d", stat.page_size);
233
values[j] = palloc(32);
234
snprintf(values[j++], 32, "%d", stat.free_size);
235
values[j] = palloc(32);
236
snprintf(values[j++], 32, "%d", stat.btpo_prev);
237
values[j] = palloc(32);
238
snprintf(values[j++], 32, "%d", stat.btpo_next);
239
values[j] = palloc(32);
240
if (stat.type == 'd')
241
snprintf(values[j++], 32, "%d", stat.btpo.xact);
243
snprintf(values[j++], 32, "%d", stat.btpo.level);
244
values[j] = palloc(32);
245
snprintf(values[j++], 32, "%d", stat.btpo_flags);
247
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
250
result = HeapTupleGetDatum(tuple);
252
ReleaseBuffer(buffer);
254
relation_close(rel, AccessShareLock);
256
PG_RETURN_DATUM(result);
259
/*-------------------------------------------------------
262
* Get IndexTupleData set in a btree page
264
* Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
265
*-------------------------------------------------------
269
* cross-call data structure for SRF
278
bt_page_items(PG_FUNCTION_ARGS)
280
text *relname = PG_GETARG_TEXT_P(0);
281
uint32 blkno = PG_GETARG_UINT32(1);
285
FuncCallContext *fctx;
287
struct user_args *uargs;
291
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
292
(errmsg("must be superuser to use pageinspect functions"))));
294
if (SRF_IS_FIRSTCALL())
302
fctx = SRF_FIRSTCALL_INIT();
304
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
305
rel = relation_openrv(relrv, AccessShareLock);
307
if (!IS_INDEX(rel) || !IS_BTREE(rel))
308
elog(ERROR, "relation \"%s\" is not a btree index",
309
RelationGetRelationName(rel));
312
* Reject attempts to read non-local temporary relations; we would be
313
* likely to get wrong data since we have no visibility into the
314
* owning session's local buffers.
316
if (RELATION_IS_OTHER_TEMP(rel))
318
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
319
errmsg("cannot access temporary tables of other sessions")));
322
elog(ERROR, "block 0 is a meta page");
324
CHECK_RELATION_BLOCK_RANGE(rel, blkno);
326
buffer = ReadBuffer(rel, blkno);
329
* We copy the page into local storage to avoid holding pin on the
330
* buffer longer than we must, and possibly failing to release it at
331
* all if the calling query doesn't fetch all rows.
333
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
335
uargs = palloc(sizeof(struct user_args));
337
uargs->page = palloc(BLCKSZ);
338
memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
340
ReleaseBuffer(buffer);
341
relation_close(rel, AccessShareLock);
343
uargs->offset = FirstOffsetNumber;
345
opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
347
if (P_ISDELETED(opaque))
348
elog(NOTICE, "page is deleted");
350
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
352
/* Build a tuple descriptor for our result type */
353
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
354
elog(ERROR, "return type must be a row type");
356
fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
358
fctx->user_fctx = uargs;
360
MemoryContextSwitchTo(mctx);
363
fctx = SRF_PERCALL_SETUP();
364
uargs = fctx->user_fctx;
366
if (fctx->call_cntr < fctx->max_calls)
376
id = PageGetItemId(uargs->page, uargs->offset);
378
if (!ItemIdIsValid(id))
379
elog(ERROR, "invalid ItemId");
381
itup = (IndexTuple) PageGetItem(uargs->page, id);
384
values[j] = palloc(32);
385
snprintf(values[j++], 32, "%d", uargs->offset);
386
values[j] = palloc(32);
387
snprintf(values[j++], 32, "(%u,%u)",
388
BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
389
itup->t_tid.ip_posid);
390
values[j] = palloc(32);
391
snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup));
392
values[j] = palloc(32);
393
snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f');
394
values[j] = palloc(32);
395
snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
397
ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
398
dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
399
dump = palloc0(dlen * 3 + 1);
401
for (off = 0; off < dlen; off++)
405
sprintf(dump, "%02x", *(ptr + off) & 0xff);
409
tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
410
result = HeapTupleGetDatum(tuple);
412
uargs->offset = uargs->offset + 1;
414
SRF_RETURN_NEXT(fctx, result);
420
SRF_RETURN_DONE(fctx);
425
/* ------------------------------------------------
428
* Get a btree's meta-page information
430
* Usage: SELECT * FROM bt_metap('t1_pkey')
431
* ------------------------------------------------
434
bt_metap(PG_FUNCTION_ARGS)
436
text *relname = PG_GETARG_TEXT_P(0);
440
BTMetaPageData *metad;
450
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
451
(errmsg("must be superuser to use pageinspect functions"))));
453
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
454
rel = relation_openrv(relrv, AccessShareLock);
456
if (!IS_INDEX(rel) || !IS_BTREE(rel))
457
elog(ERROR, "relation \"%s\" is not a btree index",
458
RelationGetRelationName(rel));
461
* Reject attempts to read non-local temporary relations; we would be
462
* likely to get wrong data since we have no visibility into the owning
463
* session's local buffers.
465
if (RELATION_IS_OTHER_TEMP(rel))
467
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
468
errmsg("cannot access temporary tables of other sessions")));
470
buffer = ReadBuffer(rel, 0);
471
page = BufferGetPage(buffer);
472
metad = BTPageGetMeta(page);
474
/* Build a tuple descriptor for our result type */
475
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
476
elog(ERROR, "return type must be a row type");
479
values[j] = palloc(32);
480
snprintf(values[j++], 32, "%d", metad->btm_magic);
481
values[j] = palloc(32);
482
snprintf(values[j++], 32, "%d", metad->btm_version);
483
values[j] = palloc(32);
484
snprintf(values[j++], 32, "%d", metad->btm_root);
485
values[j] = palloc(32);
486
snprintf(values[j++], 32, "%d", metad->btm_level);
487
values[j] = palloc(32);
488
snprintf(values[j++], 32, "%d", metad->btm_fastroot);
489
values[j] = palloc(32);
490
snprintf(values[j++], 32, "%d", metad->btm_fastlevel);
492
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
495
result = HeapTupleGetDatum(tuple);
497
ReleaseBuffer(buffer);
499
relation_close(rel, AccessShareLock);
501
PG_RETURN_DATUM(result);