4
* All the statistics collector stuff hacked up in one big, ugly file.
6
* TODO: - Separate collector, postmaster and backend stuff
7
* into different files.
9
* - Add some automatic call for pgstat vacuuming.
11
* - Add a pgstat config column to pg_database, so this
12
* entire thing can be enabled/disabled on a per db basis.
14
* Copyright (c) 2001-2005, PostgreSQL Global Development Group
16
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.87.4.2 2005-03-31 23:21:09 tgl Exp $
23
#include <sys/param.h>
25
#include <sys/socket.h>
27
#include <netinet/in.h>
28
#include <arpa/inet.h>
34
#include "access/heapam.h"
35
#include "access/xact.h"
36
#include "catalog/catname.h"
37
#include "catalog/pg_database.h"
38
#include "catalog/pg_shadow.h"
39
#include "libpq/libpq.h"
40
#include "libpq/pqsignal.h"
41
#include "mb/pg_wchar.h"
42
#include "miscadmin.h"
43
#include "postmaster/postmaster.h"
44
#include "storage/backendid.h"
45
#include "storage/fd.h"
46
#include "storage/ipc.h"
47
#include "storage/pg_shmem.h"
48
#include "storage/pmsignal.h"
49
#include "tcop/tcopprot.h"
50
#include "utils/hsearch.h"
51
#include "utils/memutils.h"
52
#include "utils/ps_status.h"
53
#include "utils/rel.h"
54
#include "utils/syscache.h"
58
* Paths for the statistics files. The %s is replaced with the
59
* installation's $PGDATA.
62
#define PGSTAT_STAT_FILENAME "%s/global/pgstat.stat"
63
#define PGSTAT_STAT_TMPFILE "%s/global/pgstat.tmp.%d"
69
#define PGSTAT_STAT_INTERVAL 500 /* How often to write the status
70
* file; in milliseconds. */
72
#define PGSTAT_DESTROY_DELAY 10000 /* How long to keep destroyed
73
* objects known, to give delayed
74
* UDP packets time to arrive; in
77
#define PGSTAT_DESTROY_COUNT (PGSTAT_DESTROY_DELAY / PGSTAT_STAT_INTERVAL)
79
#define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart
80
* a failed statistics collector;
84
* Amount of space reserved in pgstat_recvbuffer().
87
#define PGSTAT_RECVBUFFERSZ ((int) (1024 * sizeof(PgStat_Msg)))
90
* The initial size hints for the hash tables used in the collector.
93
#define PGSTAT_DB_HASH_SIZE 16
94
#define PGSTAT_BE_HASH_SIZE 512
95
#define PGSTAT_TAB_HASH_SIZE 512
102
bool pgstat_collect_startcollector = true;
103
bool pgstat_collect_resetonpmstart = true;
104
bool pgstat_collect_querystring = false;
105
bool pgstat_collect_tuplelevel = false;
106
bool pgstat_collect_blocklevel = false;
112
NON_EXEC_STATIC int pgStatSock = -1;
113
NON_EXEC_STATIC int pgStatPipe[2] = {-1,-1};
114
static struct sockaddr_storage pgStatAddr;
115
static pid_t pgStatCollectorPid = 0;
117
static time_t last_pgstat_start_time;
119
static long pgStatNumMessages = 0;
121
static bool pgStatRunningInCollector = FALSE;
123
static int pgStatTabstatAlloc = 0;
124
static int pgStatTabstatUsed = 0;
125
static PgStat_MsgTabstat **pgStatTabstatMessages = NULL;
127
#define TABSTAT_QUANTUM 4 /* we alloc this many at a time */
129
static int pgStatXactCommit = 0;
130
static int pgStatXactRollback = 0;
132
static TransactionId pgStatDBHashXact = InvalidTransactionId;
133
static HTAB *pgStatDBHash = NULL;
134
static HTAB *pgStatBeDead = NULL;
135
static PgStat_StatBeEntry *pgStatBeTable = NULL;
136
static int pgStatNumBackends = 0;
138
static char pgStat_fname[MAXPGPATH];
139
static char pgStat_tmpfname[MAXPGPATH];
143
* Local function forward declarations
148
typedef enum STATS_PROCESS_TYPE
152
} STATS_PROCESS_TYPE;
154
static pid_t pgstat_forkexec(STATS_PROCESS_TYPE procType);
155
static void pgstat_parseArgs(int argc, char *argv[]);
158
NON_EXEC_STATIC void PgstatBufferMain(int argc, char *argv[]);
159
NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
160
static void pgstat_recvbuffer(void);
161
static void pgstat_exit(SIGNAL_ARGS);
162
static void pgstat_die(SIGNAL_ARGS);
163
static void pgstat_beshutdown_hook(int code, Datum arg);
165
static int pgstat_add_backend(PgStat_MsgHdr *msg);
166
static void pgstat_sub_backend(int procpid);
167
static void pgstat_drop_database(Oid databaseid);
168
static void pgstat_write_statsfile(void);
169
static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
170
PgStat_StatBeEntry **betab,
172
static void backend_read_statsfile(void);
174
static void pgstat_setheader(PgStat_MsgHdr *hdr, int mtype);
175
static void pgstat_send(void *msg, int len);
177
static void pgstat_recv_bestart(PgStat_MsgBestart *msg, int len);
178
static void pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len);
179
static void pgstat_recv_activity(PgStat_MsgActivity *msg, int len);
180
static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
181
static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
182
static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
183
static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
186
/* ------------------------------------------------------------
187
* Public functions called from postmaster follow
188
* ------------------------------------------------------------
194
* Called from postmaster at startup. Create the resources required
195
* by the statistics collector process. If unable to do so, do not
196
* fail --- better to let the postmaster start with stats collection
203
ACCEPT_TYPE_ARG3 alen;
204
struct addrinfo *addrs = NULL,
213
#define TESTBYTEVAL ((char) 199)
216
* Force start of collector daemon if something to collect
218
if (pgstat_collect_querystring ||
219
pgstat_collect_tuplelevel ||
220
pgstat_collect_blocklevel)
221
pgstat_collect_startcollector = true;
224
* Initialize the filename for the status reports. (In the
225
* EXEC_BACKEND case, this only sets the value in the postmaster. The
226
* collector subprocess will recompute the value for itself, and
227
* individual backends must do so also if they want to access the
230
snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
233
* If we don't have to start a collector or should reset the collected
234
* statistics on postmaster start, simply remove the file.
236
if (!pgstat_collect_startcollector || pgstat_collect_resetonpmstart)
237
unlink(pgStat_fname);
240
* Nothing else required if collector will not get started
242
if (!pgstat_collect_startcollector)
246
* Create the UDP socket for sending and receiving statistic messages
248
hints.ai_flags = AI_PASSIVE;
249
hints.ai_family = PF_UNSPEC;
250
hints.ai_socktype = SOCK_DGRAM;
251
hints.ai_protocol = 0;
252
hints.ai_addrlen = 0;
253
hints.ai_addr = NULL;
254
hints.ai_canonname = NULL;
255
hints.ai_next = NULL;
256
ret = getaddrinfo_all("localhost", NULL, &hints, &addrs);
260
(errmsg("could not resolve \"localhost\": %s",
261
gai_strerror(ret))));
266
* On some platforms, getaddrinfo_all() may return multiple addresses
267
* only one of which will actually work (eg, both IPv6 and IPv4
268
* addresses when kernel will reject IPv6). Worse, the failure may
269
* occur at the bind() or perhaps even connect() stage. So we must
270
* loop through the results till we find a working combination. We
271
* will generate LOG messages, but no error, for bogus combinations.
273
for (addr = addrs; addr; addr = addr->ai_next)
275
#ifdef HAVE_UNIX_SOCKETS
276
/* Ignore AF_UNIX sockets, if any are returned. */
277
if (addr->ai_family == AF_UNIX)
284
if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) < 0)
287
(errcode_for_socket_access(),
288
errmsg("could not create socket for statistics collector: %m")));
293
* Bind it to a kernel assigned port on localhost and get the
294
* assigned port via getsockname().
296
if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
299
(errcode_for_socket_access(),
300
errmsg("could not bind socket for statistics collector: %m")));
301
closesocket(pgStatSock);
306
alen = sizeof(pgStatAddr);
307
if (getsockname(pgStatSock, (struct sockaddr *) & pgStatAddr, &alen) < 0)
310
(errcode_for_socket_access(),
311
errmsg("could not get address of socket for statistics collector: %m")));
312
closesocket(pgStatSock);
318
* Connect the socket to its own address. This saves a few cycles
319
* by not having to respecify the target address on every send.
320
* This also provides a kernel-level check that only packets from
321
* this same address will be received.
323
if (connect(pgStatSock, (struct sockaddr *) & pgStatAddr, alen) < 0)
326
(errcode_for_socket_access(),
327
errmsg("could not connect socket for statistics collector: %m")));
328
closesocket(pgStatSock);
334
* Try to send and receive a one-byte test message on the socket.
335
* This is to catch situations where the socket can be created but
336
* will not actually pass data (for instance, because kernel
337
* packet filtering rules prevent it).
339
test_byte = TESTBYTEVAL;
340
if (send(pgStatSock, &test_byte, 1, 0) != 1)
343
(errcode_for_socket_access(),
344
errmsg("could not send test message on socket for statistics collector: %m")));
345
closesocket(pgStatSock);
351
* There could possibly be a little delay before the message can
352
* be received. We arbitrarily allow up to half a second before
353
* deciding it's broken.
355
for (;;) /* need a loop to handle EINTR */
358
FD_SET(pgStatSock, &rset);
361
sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
362
if (sel_res >= 0 || errno != EINTR)
368
(errcode_for_socket_access(),
369
errmsg("select() failed in statistics collector: %m")));
370
closesocket(pgStatSock);
374
if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
377
* This is the case we actually think is likely, so take pains
378
* to give a specific message for it.
380
* errno will not be set meaningfully here, so don't use it.
383
(errcode(ERRCODE_CONNECTION_FAILURE),
384
errmsg("test message did not get through on socket for statistics collector")));
385
closesocket(pgStatSock);
390
test_byte++; /* just make sure variable is changed */
392
if (recv(pgStatSock, &test_byte, 1, 0) != 1)
395
(errcode_for_socket_access(),
396
errmsg("could not receive test message on socket for statistics collector: %m")));
397
closesocket(pgStatSock);
402
if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */
405
(errcode(ERRCODE_INTERNAL_ERROR),
406
errmsg("incorrect test message transmission on socket for statistics collector")));
407
closesocket(pgStatSock);
412
/* If we get here, we have a working socket */
416
/* Did we find a working address? */
417
if (!addr || pgStatSock < 0)
421
* Set the socket to non-blocking IO. This ensures that if the
422
* collector falls behind (despite the buffering process), statistics
423
* messages will be discarded; backends won't block waiting to send
424
* messages to the collector.
426
if (!pg_set_noblock(pgStatSock))
429
(errcode_for_socket_access(),
430
errmsg("could not set statistics collector socket to nonblocking mode: %m")));
434
freeaddrinfo_all(hints.ai_family, addrs);
440
(errmsg("disabling statistics collector for lack of working socket")));
443
freeaddrinfo_all(hints.ai_family, addrs);
446
closesocket(pgStatSock);
449
/* Adjust GUC variables to suppress useless activity */
450
pgstat_collect_startcollector = false;
451
pgstat_collect_querystring = false;
452
pgstat_collect_tuplelevel = false;
453
pgstat_collect_blocklevel = false;
460
* pgstat_forkexec() -
462
* Format up the arglist for, then fork and exec, statistics
463
* (buffer and collector) processes
466
pgstat_forkexec(STATS_PROCESS_TYPE procType)
472
char pgstatBuf[2][32];
474
av[ac++] = "postgres";
478
case STAT_PROC_BUFFER:
479
av[ac++] = "-forkbuf";
482
case STAT_PROC_COLLECTOR:
483
av[ac++] = "-forkcol";
490
av[ac++] = NULL; /* filled in by postmaster_forkexec */
492
/* postgres_exec_path is not passed by write_backend_variables */
493
av[ac++] = postgres_exec_path;
495
/* Add to the arg list */
496
Assert(bufc <= lengthof(pgstatBuf));
497
for (i = 0; i < bufc; i++)
498
av[ac++] = pgstatBuf[i];
501
Assert(ac < lengthof(av));
503
return postmaster_forkexec(ac, av);
508
* pgstat_parseArgs() -
510
* Extract data from the arglist for exec'ed statistics
511
* (buffer and collector) processes
514
pgstat_parseArgs(int argc, char *argv[])
519
StrNCpy(postgres_exec_path, argv[argc++], MAXPGPATH);
521
#endif /* EXEC_BACKEND */
527
* Called from postmaster at startup or after an existing collector
528
* died. Attempt to fire up a fresh statistics collector.
530
* Returns PID of child process, or 0 if fail.
532
* Note: if fail, we will be called again from the postmaster main loop.
542
* Do nothing if no collector needed
544
if (!pgstat_collect_startcollector)
548
* Do nothing if too soon since last collector start. This is a
549
* safety valve to protect against continuous respawn attempts if the
550
* collector is dying immediately at launch. Note that since we will
551
* be re-called from the postmaster main loop, we will get another
554
curtime = time(NULL);
555
if ((unsigned int) (curtime - last_pgstat_start_time) <
556
(unsigned int) PGSTAT_RESTART_INTERVAL)
558
last_pgstat_start_time = curtime;
561
* Check that the socket is there, else pgstat_init failed.
566
(errmsg("statistics collector startup skipped")));
569
* We can only get here if someone tries to manually turn
570
* pgstat_collect_startcollector on after it had been off.
572
pgstat_collect_startcollector = false;
577
* Okay, fork off the collector.
584
/* Specific beos actions before backend startup */
585
beos_before_backend_startup();
589
switch ((pgStatPid = pgstat_forkexec(STAT_PROC_BUFFER)))
591
switch ((pgStatPid = fork()))
596
/* Specific beos actions */
597
beos_backend_startup_failed();
600
(errmsg("could not fork statistics buffer: %m")));
605
/* in postmaster child ... */
607
/* Specific beos actions after backend startup */
608
beos_backend_startup();
610
/* Close the postmaster's sockets */
611
ClosePostmasterPorts(false);
613
/* Drop our connection to postmaster's shared memory, as well */
614
PGSharedMemoryDetach();
616
PgstatBufferMain(0, NULL);
621
return (int) pgStatPid;
624
/* shouldn't get here */
632
* Called from postmaster to tell collector a backend terminated.
636
pgstat_beterm(int pid)
638
PgStat_MsgBeterm msg;
643
MemSet(&(msg.m_hdr), 0, sizeof(msg.m_hdr));
644
msg.m_hdr.m_type = PGSTAT_MTYPE_BETERM;
645
msg.m_hdr.m_procpid = pid;
647
pgstat_send(&msg, sizeof(msg));
651
/* ------------------------------------------------------------
652
* Public functions used by backends follow
653
*------------------------------------------------------------
660
* Tell the collector that this new backend is soon ready to process
661
* queries. Called from tcop/postgres.c before entering the mainloop.
667
PgStat_MsgBestart msg;
672
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_BESTART);
673
pgstat_send(&msg, sizeof(msg));
676
* Set up a process-exit hook to ensure we flush the last batch of
677
* statistics to the collector.
679
on_proc_exit(pgstat_beshutdown_hook, 0);
683
* Flush any remaining statistics counts out to the collector at process
684
* exit. Without this, operations triggered during backend exit (such as
685
* temp table deletions) won't be counted. This is an on_proc_exit hook,
686
* not on_shmem_exit, so that everything interesting must have happened
690
pgstat_beshutdown_hook(int code, Datum arg)
692
pgstat_report_tabstat();
697
* pgstat_report_activity() -
699
* Called from tcop/postgres.c to tell the collector what the backend
700
* is actually doing (usually "<IDLE>" or the start of the query to
705
pgstat_report_activity(const char *what)
707
PgStat_MsgActivity msg;
710
if (!pgstat_collect_querystring || pgStatSock < 0)
714
len = pg_mbcliplen((const unsigned char *) what, len,
715
PGSTAT_ACTIVITY_SIZE - 1);
717
memcpy(msg.m_what, what, len);
718
msg.m_what[len] = '\0';
719
len += offsetof(PgStat_MsgActivity, m_what) +1;
721
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ACTIVITY);
722
pgstat_send(&msg, len);
727
* pgstat_report_tabstat() -
729
* Called from tcop/postgres.c to send the so far collected
730
* per table access statistics to the collector.
734
pgstat_report_tabstat(void)
738
if (pgStatSock < 0 ||
739
!(pgstat_collect_querystring ||
740
pgstat_collect_tuplelevel ||
741
pgstat_collect_blocklevel))
743
/* Not reporting stats, so just flush whatever we have */
744
pgStatTabstatUsed = 0;
749
* For each message buffer used during the last query set the header
750
* fields and send it out.
752
for (i = 0; i < pgStatTabstatUsed; i++)
754
PgStat_MsgTabstat *tsmsg = pgStatTabstatMessages[i];
758
n = tsmsg->m_nentries;
759
len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
760
n * sizeof(PgStat_TableEntry);
762
tsmsg->m_xact_commit = pgStatXactCommit;
763
tsmsg->m_xact_rollback = pgStatXactRollback;
764
pgStatXactCommit = 0;
765
pgStatXactRollback = 0;
767
pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
768
pgstat_send(tsmsg, len);
771
pgStatTabstatUsed = 0;
776
* pgstat_vacuum_tabstat() -
778
* Will tell the collector about objects he can get rid of.
782
pgstat_vacuum_tabstat(void)
790
HASH_SEQ_STATUS hstat;
791
PgStat_StatDBEntry *dbentry;
792
PgStat_StatTabEntry *tabentry;
795
PgStat_MsgTabpurge msg;
803
* If not done for this transaction, read the statistics collector
804
* stats file into some hash tables.
806
backend_read_statsfile();
809
* Lookup our own database entry
811
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
812
(void *) &MyDatabaseId,
817
if (dbentry->tables == NULL)
821
* Initialize our messages table counter to zero
826
* Check for all tables if they still exist.
828
hash_seq_init(&hstat, dbentry->tables);
829
while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
832
* Check if this relation is still alive by looking up it's
833
* pg_class tuple in the system catalog cache.
835
reltup = SearchSysCache(RELOID,
836
ObjectIdGetDatum(tabentry->tableid),
838
if (HeapTupleIsValid(reltup))
840
ReleaseSysCache(reltup);
845
* Add this tables Oid to the message
847
msg.m_tableid[msg.m_nentries++] = tabentry->tableid;
851
* If the message is full, send it out and reinitialize ot zero
853
if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
855
len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
856
+msg.m_nentries * sizeof(Oid);
858
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
859
pgstat_send(&msg, len);
868
if (msg.m_nentries > 0)
870
len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
871
+msg.m_nentries * sizeof(Oid);
873
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
874
pgstat_send(&msg, len);
878
* Read pg_database and remember the Oid's of all existing databases
882
dbidlist = (Oid *) palloc(sizeof(Oid) * dbidalloc);
884
dbrel = heap_openr(DatabaseRelationName, AccessShareLock);
885
dbscan = heap_beginscan(dbrel, SnapshotNow, 0, NULL);
886
while ((dbtup = heap_getnext(dbscan, ForwardScanDirection)) != NULL)
888
if (dbidused >= dbidalloc)
891
dbidlist = (Oid *) repalloc((char *) dbidlist,
892
sizeof(Oid) * dbidalloc);
894
dbidlist[dbidused++] = HeapTupleGetOid(dbtup);
896
heap_endscan(dbscan);
897
heap_close(dbrel, AccessShareLock);
900
* Search the database hash table for dead databases and tell the
901
* collector to drop them as well.
903
hash_seq_init(&hstat, pgStatDBHash);
904
while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
906
Oid dbid = dbentry->databaseid;
908
for (i = 0; i < dbidused; i++)
910
if (dbidlist[i] == dbid)
917
if (dbid != InvalidOid)
920
pgstat_drop_database(dbid);
925
* Free the dbid list.
930
* Tell the caller how many removeable objects we found
937
* pgstat_drop_database() -
939
* Tell the collector that we just dropped a database.
940
* This is the only message that shouldn't get lost in space. Otherwise
941
* the collector will keep the statistics for the dead DB until his
942
* stats file got removed while the postmaster is down.
946
pgstat_drop_database(Oid databaseid)
948
PgStat_MsgDropdb msg;
953
msg.m_databaseid = databaseid;
955
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
956
pgstat_send(&msg, sizeof(msg));
961
* pgstat_reset_counters() -
963
* Tell the statistics collector to reset counters for our database.
967
pgstat_reset_counters(void)
969
PgStat_MsgResetcounter msg;
976
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
977
errmsg("must be superuser to reset statistics counters")));
979
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETCOUNTER);
980
pgstat_send(&msg, sizeof(msg));
987
* Send some junk data to the collector to increase traffic.
998
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DUMMY);
999
pgstat_send(&msg, sizeof(msg));
1003
* Create or enlarge the pgStatTabstatMessages array
1006
more_tabstat_space(void)
1008
PgStat_MsgTabstat *newMessages;
1009
PgStat_MsgTabstat **msgArray;
1010
int newAlloc = pgStatTabstatAlloc + TABSTAT_QUANTUM;
1013
/* Create (another) quantum of message buffers */
1014
newMessages = (PgStat_MsgTabstat *)
1015
MemoryContextAllocZero(TopMemoryContext,
1016
sizeof(PgStat_MsgTabstat) * TABSTAT_QUANTUM);
1018
/* Create or enlarge the pointer array */
1019
if (pgStatTabstatMessages == NULL)
1020
msgArray = (PgStat_MsgTabstat **)
1021
MemoryContextAlloc(TopMemoryContext,
1022
sizeof(PgStat_MsgTabstat *) * newAlloc);
1024
msgArray = (PgStat_MsgTabstat **)
1025
repalloc(pgStatTabstatMessages,
1026
sizeof(PgStat_MsgTabstat *) * newAlloc);
1028
for (i = 0; i < TABSTAT_QUANTUM; i++)
1029
msgArray[pgStatTabstatAlloc + i] = newMessages++;
1030
pgStatTabstatMessages = msgArray;
1031
pgStatTabstatAlloc = newAlloc;
1033
Assert(pgStatTabstatUsed < pgStatTabstatAlloc);
1037
* pgstat_initstats() -
1039
* Called from various places usually dealing with initialization
1040
* of Relation or Scan structures. The data placed into these
1041
* structures from here tell where later to count for buffer reads,
1042
* scans and tuples fetched.
1046
pgstat_initstats(PgStat_Info *stats, Relation rel)
1048
Oid rel_id = rel->rd_id;
1049
PgStat_TableEntry *useent;
1050
PgStat_MsgTabstat *tsmsg;
1055
* Initialize data not to count at all.
1057
stats->tabentry = NULL;
1058
stats->no_stats = FALSE;
1059
stats->heap_scan_counted = FALSE;
1060
stats->index_scan_counted = FALSE;
1062
if (pgStatSock < 0 ||
1063
!(pgstat_collect_tuplelevel ||
1064
pgstat_collect_blocklevel))
1066
stats->no_stats = TRUE;
1071
* Search the already-used message slots for this relation.
1073
for (mb = 0; mb < pgStatTabstatUsed; mb++)
1075
tsmsg = pgStatTabstatMessages[mb];
1077
for (i = tsmsg->m_nentries; --i >= 0;)
1079
if (tsmsg->m_entry[i].t_id == rel_id)
1081
stats->tabentry = (void *) &(tsmsg->m_entry[i]);
1086
if (tsmsg->m_nentries >= PGSTAT_NUM_TABENTRIES)
1090
* Not found, but found a message buffer with an empty slot
1091
* instead. Fine, let's use this one.
1093
i = tsmsg->m_nentries++;
1094
useent = &tsmsg->m_entry[i];
1095
MemSet(useent, 0, sizeof(PgStat_TableEntry));
1096
useent->t_id = rel_id;
1097
stats->tabentry = (void *) useent;
1102
* If we ran out of message buffers, we just allocate more.
1104
if (pgStatTabstatUsed >= pgStatTabstatAlloc)
1105
more_tabstat_space();
1108
* Use the first entry of the next message buffer.
1110
mb = pgStatTabstatUsed++;
1111
tsmsg = pgStatTabstatMessages[mb];
1112
tsmsg->m_nentries = 1;
1113
useent = &tsmsg->m_entry[0];
1114
MemSet(useent, 0, sizeof(PgStat_TableEntry));
1115
useent->t_id = rel_id;
1116
stats->tabentry = (void *) useent;
1121
* pgstat_count_xact_commit() -
1123
* Called from access/transam/xact.c to count transaction commits.
1127
pgstat_count_xact_commit(void)
1129
if (!(pgstat_collect_querystring ||
1130
pgstat_collect_tuplelevel ||
1131
pgstat_collect_blocklevel))
1137
* If there was no relation activity yet, just make one existing
1138
* message buffer used without slots, causing the next report to tell
1139
* new xact-counters.
1141
if (pgStatTabstatAlloc == 0)
1142
more_tabstat_space();
1144
if (pgStatTabstatUsed == 0)
1146
pgStatTabstatUsed++;
1147
pgStatTabstatMessages[0]->m_nentries = 0;
1153
* pgstat_count_xact_rollback() -
1155
* Called from access/transam/xact.c to count transaction rollbacks.
1159
pgstat_count_xact_rollback(void)
1161
if (!(pgstat_collect_querystring ||
1162
pgstat_collect_tuplelevel ||
1163
pgstat_collect_blocklevel))
1166
pgStatXactRollback++;
1169
* If there was no relation activity yet, just make one existing
1170
* message buffer used without slots, causing the next report to tell
1171
* new xact-counters.
1173
if (pgStatTabstatAlloc == 0)
1174
more_tabstat_space();
1176
if (pgStatTabstatUsed == 0)
1178
pgStatTabstatUsed++;
1179
pgStatTabstatMessages[0]->m_nentries = 0;
1185
* pgstat_fetch_stat_dbentry() -
1187
* Support function for the SQL-callable pgstat* functions. Returns
1188
* the collected statistics for one database or NULL. NULL doesn't mean
1189
* that the database doesn't exist, it is just not yet known by the
1190
* collector, so the caller is better off to report ZERO instead.
1193
PgStat_StatDBEntry *
1194
pgstat_fetch_stat_dbentry(Oid dbid)
1196
PgStat_StatDBEntry *dbentry;
1199
* If not done for this transaction, read the statistics collector
1200
* stats file into some hash tables.
1202
backend_read_statsfile();
1205
* Lookup the requested database
1207
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1210
if (dbentry == NULL)
1218
* pgstat_fetch_stat_tabentry() -
1220
* Support function for the SQL-callable pgstat* functions. Returns
1221
* the collected statistics for one table or NULL. NULL doesn't mean
1222
* that the table doesn't exist, it is just not yet known by the
1223
* collector, so the caller is better off to report ZERO instead.
1226
PgStat_StatTabEntry *
1227
pgstat_fetch_stat_tabentry(Oid relid)
1229
PgStat_StatDBEntry *dbentry;
1230
PgStat_StatTabEntry *tabentry;
1233
* If not done for this transaction, read the statistics collector
1234
* stats file into some hash tables.
1236
backend_read_statsfile();
1239
* Lookup our database.
1241
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1242
(void *) &MyDatabaseId,
1244
if (dbentry == NULL)
1248
* Now inside the DB's table hash table lookup the requested one.
1250
if (dbentry->tables == NULL)
1252
tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
1255
if (tabentry == NULL)
1263
* pgstat_fetch_stat_beentry() -
1265
* Support function for the SQL-callable pgstat* functions. Returns
1266
* the actual activity slot of one active backend. The caller is
1267
* responsible for a check if the actual user is permitted to see
1268
* that info (especially the querystring).
1271
PgStat_StatBeEntry *
1272
pgstat_fetch_stat_beentry(int beid)
1274
backend_read_statsfile();
1276
if (beid < 1 || beid > pgStatNumBackends)
1279
return &pgStatBeTable[beid - 1];
1284
* pgstat_fetch_stat_numbackends() -
1286
* Support function for the SQL-callable pgstat* functions. Returns
1287
* the maximum current backend id.
1291
pgstat_fetch_stat_numbackends(void)
1293
backend_read_statsfile();
1295
return pgStatNumBackends;
1300
/* ------------------------------------------------------------
1301
* Local support functions follow
1302
* ------------------------------------------------------------
1307
* pgstat_setheader() -
1309
* Set common header fields in a statistics message
1313
pgstat_setheader(PgStat_MsgHdr *hdr, int mtype)
1315
hdr->m_type = mtype;
1316
hdr->m_backendid = MyBackendId;
1317
hdr->m_procpid = MyProcPid;
1318
hdr->m_databaseid = MyDatabaseId;
1319
hdr->m_userid = GetSessionUserId();
1326
* Send out one statistics message to the collector
1330
pgstat_send(void *msg, int len)
1335
((PgStat_MsgHdr *) msg)->m_size = len;
1337
send(pgStatSock, msg, len, 0);
1338
/* We deliberately ignore any error from send() */
1343
* PgstatBufferMain() -
1345
* Start up the statistics buffer process. This is the body of the
1346
* postmaster child process.
1348
* The argc/argv parameters are valid only in EXEC_BACKEND case.
1351
NON_EXEC_STATIC void
1352
PgstatBufferMain(int argc, char *argv[])
1354
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
1356
MyProcPid = getpid(); /* reset MyProcPid */
1358
/* Lose the postmaster's on-exit routines */
1362
* Ignore all signals usually bound to some action in the postmaster,
1363
* except for SIGCHLD and SIGQUIT --- see pgstat_recvbuffer.
1365
pqsignal(SIGHUP, SIG_IGN);
1366
pqsignal(SIGINT, SIG_IGN);
1367
pqsignal(SIGTERM, SIG_IGN);
1368
pqsignal(SIGQUIT, pgstat_exit);
1369
pqsignal(SIGALRM, SIG_IGN);
1370
pqsignal(SIGPIPE, SIG_IGN);
1371
pqsignal(SIGUSR1, SIG_IGN);
1372
pqsignal(SIGUSR2, SIG_IGN);
1373
pqsignal(SIGCHLD, pgstat_die);
1374
pqsignal(SIGTTIN, SIG_DFL);
1375
pqsignal(SIGTTOU, SIG_DFL);
1376
pqsignal(SIGCONT, SIG_DFL);
1377
pqsignal(SIGWINCH, SIG_DFL);
1378
/* unblock will happen in pgstat_recvbuffer */
1381
pgstat_parseArgs(argc, argv);
1385
* Start a buffering process to read from the socket, so we have a
1386
* little more time to process incoming messages.
1388
* NOTE: the process structure is: postmaster is parent of buffer process
1389
* is parent of collector process. This way, the buffer can detect
1390
* collector failure via SIGCHLD, whereas otherwise it wouldn't notice
1391
* collector failure until it tried to write on the pipe. That would
1392
* mean that after the postmaster started a new collector, we'd have
1393
* two buffer processes competing to read from the UDP socket --- not
1396
if (pgpipe(pgStatPipe) < 0)
1398
(errcode_for_socket_access(),
1399
errmsg("could not create pipe for statistics buffer: %m")));
1401
/* child becomes collector process */
1403
pgStatCollectorPid = pgstat_forkexec(STAT_PROC_COLLECTOR);
1405
pgStatCollectorPid = fork();
1407
switch (pgStatCollectorPid)
1411
(errmsg("could not fork statistics collector: %m")));
1413
#ifndef EXEC_BACKEND
1415
/* child becomes collector process */
1416
PgstatCollectorMain(0, NULL);
1421
/* parent becomes buffer process */
1422
closesocket(pgStatPipe[0]);
1423
pgstat_recvbuffer();
1430
* PgstatCollectorMain() -
1432
* Start up the statistics collector itself. This is the body of the
1433
* postmaster grandchild process.
1435
* The argc/argv parameters are valid only in EXEC_BACKEND case.
1438
NON_EXEC_STATIC void
1439
PgstatCollectorMain(int argc, char *argv[])
1446
struct timeval timeout;
1447
struct timeval next_statwrite;
1448
bool need_statwrite;
1451
MyProcPid = getpid(); /* reset MyProcPid */
1454
* Reset signal handling. With the exception of restoring default
1455
* SIGCHLD and SIGQUIT handling, this is a no-op in the
1456
* non-EXEC_BACKEND case because we'll have inherited these settings
1457
* from the buffer process; but it's not a no-op for EXEC_BACKEND.
1459
pqsignal(SIGHUP, SIG_IGN);
1460
pqsignal(SIGINT, SIG_IGN);
1461
pqsignal(SIGTERM, SIG_IGN);
1463
pqsignal(SIGQUIT, SIG_IGN);
1465
/* kluge to allow buffer process to kill collector; FIXME */
1466
pqsignal(SIGQUIT, pgstat_exit);
1468
pqsignal(SIGALRM, SIG_IGN);
1469
pqsignal(SIGPIPE, SIG_IGN);
1470
pqsignal(SIGUSR1, SIG_IGN);
1471
pqsignal(SIGUSR2, SIG_IGN);
1472
pqsignal(SIGCHLD, SIG_DFL);
1473
pqsignal(SIGTTIN, SIG_DFL);
1474
pqsignal(SIGTTOU, SIG_DFL);
1475
pqsignal(SIGCONT, SIG_DFL);
1476
pqsignal(SIGWINCH, SIG_DFL);
1477
PG_SETMASK(&UnBlockSig);
1480
pgstat_parseArgs(argc, argv);
1483
/* Close unwanted files */
1484
closesocket(pgStatPipe[1]);
1485
closesocket(pgStatSock);
1488
* Identify myself via ps
1490
init_ps_display("stats collector process", "", "");
1494
* Initialize filenames needed for status reports.
1496
snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
1497
/* tmpfname need only be set correctly in this process */
1498
snprintf(pgStat_tmpfname, MAXPGPATH, PGSTAT_STAT_TMPFILE,
1499
DataDir, (int)getpid());
1502
* Arrange to write the initial status file right away
1504
gettimeofday(&next_statwrite, NULL);
1505
need_statwrite = TRUE;
1508
* Read in an existing statistics stats file or initialize the stats
1511
pgStatRunningInCollector = TRUE;
1512
pgstat_read_statsfile(&pgStatDBHash, InvalidOid, NULL, NULL);
1515
* Create the dead backend hashtable
1517
memset(&hash_ctl, 0, sizeof(hash_ctl));
1518
hash_ctl.keysize = sizeof(int);
1519
hash_ctl.entrysize = sizeof(PgStat_StatBeDead);
1520
hash_ctl.hash = tag_hash;
1521
pgStatBeDead = hash_create("Dead Backends", PGSTAT_BE_HASH_SIZE,
1522
&hash_ctl, HASH_ELEM | HASH_FUNCTION);
1525
* Create the known backends table
1527
pgStatBeTable = (PgStat_StatBeEntry *) palloc0(
1528
sizeof(PgStat_StatBeEntry) * MaxBackends);
1530
readPipe = pgStatPipe[0];
1533
* Process incoming messages and handle all the reporting stuff until
1534
* there are no more messages.
1539
* If we need to write the status file again (there have been
1540
* changes in the statistics since we wrote it last) calculate the
1541
* timeout until we have to do so.
1547
gettimeofday(&now, NULL);
1548
/* avoid assuming that tv_sec is signed */
1549
if (now.tv_sec > next_statwrite.tv_sec ||
1550
(now.tv_sec == next_statwrite.tv_sec &&
1551
now.tv_usec >= next_statwrite.tv_usec))
1554
timeout.tv_usec = 0;
1558
timeout.tv_sec = next_statwrite.tv_sec - now.tv_sec;
1559
timeout.tv_usec = next_statwrite.tv_usec - now.tv_usec;
1560
if (timeout.tv_usec < 0)
1563
timeout.tv_usec += 1000000;
1569
* Setup the descriptor set for select(2)
1572
FD_SET(readPipe, &rfds);
1575
* Now wait for something to do.
1577
nready = select(readPipe + 1, &rfds, NULL, NULL,
1578
(need_statwrite) ? &timeout : NULL);
1584
(errcode_for_socket_access(),
1585
errmsg("select() failed in statistics collector: %m")));
1589
* If there are no descriptors ready, our timeout for writing the
1590
* stats file happened.
1594
pgstat_write_statsfile();
1595
need_statwrite = FALSE;
1601
* Check if there is a new statistics message to collect.
1603
if (FD_ISSET(readPipe, &rfds))
1606
* We may need to issue multiple read calls in case the buffer
1607
* process didn't write the message in a single write, which
1608
* is possible since it dumps its buffer bytewise. In any
1609
* case, we'd need two reads since we don't know the message
1613
int targetlen = sizeof(PgStat_MsgHdr); /* initial */
1614
bool pipeEOF = false;
1616
while (nread < targetlen)
1618
len = piperead(readPipe, ((char *) &msg) + nread,
1625
(errcode_for_socket_access(),
1626
errmsg("could not read from statistics collector pipe: %m")));
1628
if (len == 0) /* EOF on the pipe! */
1634
if (nread == sizeof(PgStat_MsgHdr))
1636
/* we have the header, compute actual msg length */
1637
targetlen = msg.msg_hdr.m_size;
1638
if (targetlen < (int) sizeof(PgStat_MsgHdr) ||
1639
targetlen > (int) sizeof(msg))
1642
* Bogus message length implies that we got out of
1643
* sync with the buffer process somehow. Abort so
1644
* that we can restart both processes.
1647
(errmsg("invalid statistics message length")));
1653
* EOF on the pipe implies that the buffer process exited.
1654
* Fall out of outer loop.
1660
* Distribute the message to the specific function handling
1663
switch (msg.msg_hdr.m_type)
1665
case PGSTAT_MTYPE_DUMMY:
1668
case PGSTAT_MTYPE_BESTART:
1669
pgstat_recv_bestart((PgStat_MsgBestart *) &msg, nread);
1672
case PGSTAT_MTYPE_BETERM:
1673
pgstat_recv_beterm((PgStat_MsgBeterm *) &msg, nread);
1676
case PGSTAT_MTYPE_TABSTAT:
1677
pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, nread);
1680
case PGSTAT_MTYPE_TABPURGE:
1681
pgstat_recv_tabpurge((PgStat_MsgTabpurge *) &msg, nread);
1684
case PGSTAT_MTYPE_ACTIVITY:
1685
pgstat_recv_activity((PgStat_MsgActivity *) &msg, nread);
1688
case PGSTAT_MTYPE_DROPDB:
1689
pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, nread);
1692
case PGSTAT_MTYPE_RESETCOUNTER:
1693
pgstat_recv_resetcounter((PgStat_MsgResetcounter *) &msg,
1702
* Globally count messages.
1704
pgStatNumMessages++;
1707
* If this is the first message after we wrote the stats file
1708
* the last time, setup the timeout that it'd be written.
1710
if (!need_statwrite)
1712
gettimeofday(&next_statwrite, NULL);
1713
next_statwrite.tv_usec += ((PGSTAT_STAT_INTERVAL) * 1000);
1714
next_statwrite.tv_sec += (next_statwrite.tv_usec / 1000000);
1715
next_statwrite.tv_usec %= 1000000;
1716
need_statwrite = TRUE;
1721
* Note that we do NOT check for postmaster exit inside the loop;
1722
* only EOF on the buffer pipe causes us to fall out. This
1723
* ensures we don't exit prematurely if there are still a few
1724
* messages in the buffer or pipe at postmaster shutdown.
1729
* Okay, we saw EOF on the buffer pipe, so there are no more messages
1730
* to process. If the buffer process quit because of postmaster
1731
* shutdown, we want to save the final stats to reuse at next startup.
1732
* But if the buffer process failed, it seems best not to (there may
1733
* even now be a new collector firing up, and we don't want it to read
1734
* a partially-rewritten stats file).
1736
if (!PostmasterIsAlive(false))
1737
pgstat_write_statsfile();
1742
* pgstat_recvbuffer() -
1744
* This is the body of the separate buffering process. Its only
1745
* purpose is to receive messages from the UDP socket as fast as
1746
* possible and forward them over a pipe into the collector itself.
1747
* If the collector is slow to absorb messages, they are buffered here.
1751
pgstat_recvbuffer(void)
1755
struct timeval timeout;
1756
int writePipe = pgStatPipe[1];
1762
PgStat_Msg input_buffer;
1764
int msg_send = 0; /* next send index in buffer */
1765
int msg_recv = 0; /* next receive index */
1766
int msg_have = 0; /* number of bytes stored */
1767
bool overflow = false;
1770
* Identify myself via ps
1772
init_ps_display("stats buffer process", "", "");
1776
* We want to die if our child collector process does. There are two
1777
* ways we might notice that it has died: receive SIGCHLD, or get a
1778
* write failure on the pipe leading to the child. We can set SIGPIPE
1779
* to kill us here. Our SIGCHLD handler was already set up before we
1780
* forked (must do it that way, else it's a race condition).
1782
pqsignal(SIGPIPE, SIG_DFL);
1783
PG_SETMASK(&UnBlockSig);
1786
* Set the write pipe to nonblock mode, so that we cannot block when
1787
* the collector falls behind.
1789
if (!pg_set_noblock(writePipe))
1791
(errcode_for_socket_access(),
1792
errmsg("could not set statistics collector pipe to nonblocking mode: %m")));
1795
* Allocate the message buffer
1797
msgbuffer = (char *) palloc(PGSTAT_RECVBUFFERSZ);
1809
* As long as we have buffer space we add the socket to the read
1812
if (msg_have <= (int) (PGSTAT_RECVBUFFERSZ - sizeof(PgStat_Msg)))
1814
FD_SET(pgStatSock, &rfds);
1823
(errmsg("statistics buffer is full")));
1829
* If we have messages to write out, we add the pipe to the write
1834
FD_SET(writePipe, &wfds);
1835
if (writePipe > maxfd)
1840
* Wait for some work to do; but not for more than 10 seconds.
1841
* (This determines how quickly we will shut down after an
1842
* ungraceful postmaster termination; so it needn't be very fast.)
1844
timeout.tv_sec = 10;
1845
timeout.tv_usec = 0;
1847
nready = select(maxfd + 1, &rfds, &wfds, NULL, &timeout);
1853
(errcode_for_socket_access(),
1854
errmsg("select() failed in statistics buffer: %m")));
1858
* If there is a message on the socket, read it and check for
1861
if (FD_ISSET(pgStatSock, &rfds))
1863
len = recv(pgStatSock, (char *) &input_buffer,
1864
sizeof(PgStat_Msg), 0);
1867
(errcode_for_socket_access(),
1868
errmsg("could not read statistics message: %m")));
1871
* We ignore messages that are smaller than our common header
1873
if (len < sizeof(PgStat_MsgHdr))
1877
* The received length must match the length in the header
1879
if (input_buffer.msg_hdr.m_size != len)
1883
* O.K. - we accept this message. Copy it to the circular
1889
xfr = PGSTAT_RECVBUFFERSZ - msg_recv;
1893
memcpy(msgbuffer + msg_recv,
1894
((char *) &input_buffer) + frm,
1897
if (msg_recv == PGSTAT_RECVBUFFERSZ)
1906
* If the collector is ready to receive, write some data into his
1907
* pipe. We may or may not be able to write all that we have.
1909
* NOTE: if what we have is less than PIPE_BUF bytes but more than
1910
* the space available in the pipe buffer, most kernels will
1911
* refuse to write any of it, and will return EAGAIN. This means
1912
* we will busy-loop until the situation changes (either because
1913
* the collector caught up, or because more data arrives so that
1914
* we have more than PIPE_BUF bytes buffered). This is not good,
1915
* but is there any way around it? We have no way to tell when
1916
* the collector has caught up...
1918
if (FD_ISSET(writePipe, &wfds))
1920
xfr = PGSTAT_RECVBUFFERSZ - msg_send;
1924
len = pipewrite(writePipe, msgbuffer + msg_send, xfr);
1927
if (errno == EINTR || errno == EAGAIN)
1928
continue; /* not enough space in pipe */
1930
(errcode_for_socket_access(),
1931
errmsg("could not write to statistics collector pipe: %m")));
1933
/* NB: len < xfr is okay */
1935
if (msg_send == PGSTAT_RECVBUFFERSZ)
1941
* Make sure we forwarded all messages before we check for
1942
* postmaster termination.
1944
if (msg_have != 0 || FD_ISSET(pgStatSock, &rfds))
1948
* If the postmaster has terminated, we die too. (This is no
1949
* longer the normal exit path, however.)
1951
if (!PostmasterIsAlive(true))
1956
/* SIGQUIT signal handler for buffer process */
1958
pgstat_exit(SIGNAL_ARGS)
1961
* For now, we just nail the doors shut and get out of town. It might
1962
* be cleaner to allow any pending messages to be sent, but that
1963
* creates a tradeoff against speed of exit.
1967
* If running in bufferer, kill our collector as well. On some broken
1968
* win32 systems, it does not shut down automatically because of issues
1969
* with socket inheritance. XXX so why not fix the socket inheritance...
1972
if (pgStatCollectorPid > 0)
1973
kill(pgStatCollectorPid, SIGQUIT);
1978
/* SIGCHLD signal handler for buffer process */
1980
pgstat_die(SIGNAL_ARGS)
1987
* pgstat_add_backend() -
1989
* Support function to keep our backend list up to date.
1993
pgstat_add_backend(PgStat_MsgHdr *msg)
1995
PgStat_StatDBEntry *dbentry;
1996
PgStat_StatBeEntry *beentry;
1997
PgStat_StatBeDead *deadbe;
2001
* Check that the backend ID is valid
2003
if (msg->m_backendid < 1 || msg->m_backendid > MaxBackends)
2006
(errmsg("invalid server process ID %d", msg->m_backendid)));
2011
* Get the slot for this backendid.
2013
beentry = &pgStatBeTable[msg->m_backendid - 1];
2014
if (beentry->databaseid != InvalidOid)
2017
* If the slot contains the PID of this backend, everything is
2018
* fine and we got nothing to do.
2020
if (beentry->procpid == msg->m_procpid)
2025
* Lookup if this backend is known to be dead. This can be caused due
2026
* to messages arriving in the wrong order - i.e. Postmaster's BETERM
2027
* message might have arrived before we received all the backends
2028
* stats messages, or even a new backend with the same backendid was
2029
* faster in sending his BESTART.
2031
* If the backend is known to be dead, we ignore this add.
2033
deadbe = (PgStat_StatBeDead *) hash_search(pgStatBeDead,
2034
(void *) &(msg->m_procpid),
2040
* Backend isn't known to be dead. If it's slot is currently used, we
2041
* have to kick out the old backend.
2043
if (beentry->databaseid != InvalidOid)
2044
pgstat_sub_backend(beentry->procpid);
2047
* Put this new backend into the slot.
2049
beentry->databaseid = msg->m_databaseid;
2050
beentry->procpid = msg->m_procpid;
2051
beentry->userid = msg->m_userid;
2052
beentry->activity_start_sec = 0;
2053
beentry->activity_start_usec = 0;
2054
MemSet(beentry->activity, 0, PGSTAT_ACTIVITY_SIZE);
2057
* Lookup or create the database entry for this backends DB.
2059
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2060
(void *) &(msg->m_databaseid),
2061
HASH_ENTER, &found);
2062
if (dbentry == NULL)
2064
(errcode(ERRCODE_OUT_OF_MEMORY),
2065
errmsg("out of memory in statistics collector --- abort")));
2068
* If not found, initialize the new one.
2074
dbentry->tables = NULL;
2075
dbentry->n_xact_commit = 0;
2076
dbentry->n_xact_rollback = 0;
2077
dbentry->n_blocks_fetched = 0;
2078
dbentry->n_blocks_hit = 0;
2079
dbentry->n_connects = 0;
2080
dbentry->destroy = 0;
2082
memset(&hash_ctl, 0, sizeof(hash_ctl));
2083
hash_ctl.keysize = sizeof(Oid);
2084
hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
2085
hash_ctl.hash = tag_hash;
2086
dbentry->tables = hash_create("Per-database table",
2087
PGSTAT_TAB_HASH_SIZE,
2089
HASH_ELEM | HASH_FUNCTION);
2093
* Count number of connects to the database
2095
dbentry->n_connects++;
2102
* pgstat_sub_backend() -
2104
* Remove a backend from the actual backends list.
2108
pgstat_sub_backend(int procpid)
2111
PgStat_StatBeDead *deadbe;
2115
* Search in the known-backends table for the slot containing this
2118
for (i = 0; i < MaxBackends; i++)
2120
if (pgStatBeTable[i].databaseid != InvalidOid &&
2121
pgStatBeTable[i].procpid == procpid)
2124
* That's him. Add an entry to the known to be dead backends.
2125
* Due to possible misorder in the arrival of UDP packets it's
2126
* possible that even if we know the backend is dead, there
2127
* could still be messages queued that arrive later. Those
2128
* messages must not cause our number of backends statistics
2129
* to get screwed up, so we remember for a couple of seconds
2130
* that this PID is dead and ignore them (only the counting of
2131
* backends, not the table access stats they sent).
2133
deadbe = (PgStat_StatBeDead *) hash_search(pgStatBeDead,
2139
(errcode(ERRCODE_OUT_OF_MEMORY),
2140
errmsg("out of memory in statistics collector --- abort")));
2144
deadbe->backendid = i + 1;
2145
deadbe->destroy = PGSTAT_DESTROY_COUNT;
2149
* Declare the backend slot empty.
2151
pgStatBeTable[i].databaseid = InvalidOid;
2157
* No big problem if not found. This can happen if UDP messages arrive
2158
* out of order here.
2164
* pgstat_write_statsfile() -
2170
pgstat_write_statsfile(void)
2172
HASH_SEQ_STATUS hstat;
2173
HASH_SEQ_STATUS tstat;
2174
PgStat_StatDBEntry *dbentry;
2175
PgStat_StatTabEntry *tabentry;
2176
PgStat_StatBeDead *deadbe;
2181
* Open the statistics temp file to write out the current values.
2183
fpout = fopen(pgStat_tmpfname, PG_BINARY_W);
2187
(errcode_for_file_access(),
2188
errmsg("could not open temporary statistics file \"%s\": %m",
2194
* Walk through the database table.
2196
hash_seq_init(&hstat, pgStatDBHash);
2197
while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
2200
* If this database is marked destroyed, count down and do so if
2203
if (dbentry->destroy > 0)
2205
if (--(dbentry->destroy) == 0)
2207
if (dbentry->tables != NULL)
2208
hash_destroy(dbentry->tables);
2210
if (hash_search(pgStatDBHash,
2211
(void *) &(dbentry->databaseid),
2212
HASH_REMOVE, NULL) == NULL)
2214
(errmsg("database hash table corrupted "
2215
"during cleanup --- abort")));
2219
* Don't include statistics for it.
2225
* Write out the DB line including the number of life backends.
2228
fwrite(dbentry, sizeof(PgStat_StatDBEntry), 1, fpout);
2231
* Walk through the databases access stats per table.
2233
hash_seq_init(&tstat, dbentry->tables);
2234
while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
2237
* If table entry marked for destruction, same as above for
2238
* the database entry.
2240
if (tabentry->destroy > 0)
2242
if (--(tabentry->destroy) == 0)
2244
if (hash_search(dbentry->tables,
2245
(void *) &(tabentry->tableid),
2246
HASH_REMOVE, NULL) == NULL)
2249
(errmsg("tables hash table for "
2250
"database %u corrupted during "
2251
"cleanup --- abort",
2252
dbentry->databaseid)));
2259
* At least we think this is still a life table. Print it's
2263
fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
2267
* Mark the end of this DB
2273
* Write out the known running backends to the stats file.
2277
fwrite(&i, sizeof(i), 1, fpout);
2279
for (i = 0; i < MaxBackends; i++)
2281
if (pgStatBeTable[i].databaseid != InvalidOid)
2284
fwrite(&pgStatBeTable[i], sizeof(PgStat_StatBeEntry), 1, fpout);
2289
* No more output to be done. Close the temp file and replace the old
2290
* pgstat.stat with it.
2293
if (fclose(fpout) < 0)
2296
(errcode_for_file_access(),
2297
errmsg("could not close temporary statistics file \"%s\": %m",
2302
if (rename(pgStat_tmpfname, pgStat_fname) < 0)
2305
(errcode_for_file_access(),
2306
errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
2307
pgStat_tmpfname, pgStat_fname)));
2312
* Clear out the dead backends table
2314
hash_seq_init(&hstat, pgStatBeDead);
2315
while ((deadbe = (PgStat_StatBeDead *) hash_seq_search(&hstat)) != NULL)
2318
* Count down the destroy delay and remove entries where it
2321
if (--(deadbe->destroy) <= 0)
2323
if (hash_search(pgStatBeDead,
2324
(void *) &(deadbe->procpid),
2325
HASH_REMOVE, NULL) == NULL)
2328
(errmsg("dead-server-process hash table corrupted "
2329
"during cleanup --- abort")));
2337
* pgstat_read_statsfile() -
2339
* Reads in an existing statistics collector and initializes the
2340
* databases hash table (who's entries point to the tables hash tables)
2341
* and the current backend table.
2345
pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
2346
PgStat_StatBeEntry **betab, int *numbackends)
2348
PgStat_StatDBEntry *dbentry;
2349
PgStat_StatDBEntry dbbuf;
2350
PgStat_StatTabEntry *tabentry;
2351
PgStat_StatTabEntry tabbuf;
2353
HTAB *tabhash = NULL;
2355
int maxbackends = 0;
2356
int havebackends = 0;
2358
MemoryContext use_mcxt;
2362
* If running in the collector we use the DynaHashCxt memory context.
2363
* If running in a backend, we use the TopTransactionContext instead,
2364
* so the caller must only know the last XactId when this call
2365
* happened to know if his tables are still valid or already gone!
2367
if (pgStatRunningInCollector)
2374
use_mcxt = TopTransactionContext;
2375
mcxt_flags = HASH_CONTEXT;
2379
* Create the DB hashtable
2381
memset(&hash_ctl, 0, sizeof(hash_ctl));
2382
hash_ctl.keysize = sizeof(Oid);
2383
hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
2384
hash_ctl.hash = tag_hash;
2385
hash_ctl.hcxt = use_mcxt;
2386
*dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
2387
HASH_ELEM | HASH_FUNCTION | mcxt_flags);
2390
* Initialize the number of known backends to zero, just in case we do
2391
* a silent error return below.
2393
if (numbackends != NULL)
2399
* In EXEC_BACKEND case, we won't have inherited pgStat_fname from
2400
* postmaster, so compute it first time through.
2403
if (pgStat_fname[0] == '\0')
2405
Assert(DataDir != NULL);
2406
snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
2411
* Try to open the status file. If it doesn't exist, the backends
2412
* simply return zero for anything and the collector simply starts
2413
* from scratch with empty counters.
2415
if ((fpin = AllocateFile(pgStat_fname, PG_BINARY_R)) == NULL)
2419
* We found an existing collector stats file. Read it and put all the
2420
* hashtable entries into place.
2424
switch (fgetc(fpin))
2427
* 'D' A PgStat_StatDBEntry struct describing a database
2428
* follows. Subsequently, zero to many 'T' entries will
2429
* follow until a 'd' is encountered.
2432
if (fread(&dbbuf, 1, sizeof(dbbuf), fpin) != sizeof(dbbuf))
2434
ereport(pgStatRunningInCollector ? LOG : WARNING,
2435
(errmsg("corrupted pgstat.stat file")));
2440
* Add to the DB hash
2442
dbentry = (PgStat_StatDBEntry *) hash_search(*dbhash,
2443
(void *) &dbbuf.databaseid,
2446
if (dbentry == NULL)
2449
(errcode(ERRCODE_OUT_OF_MEMORY),
2450
errmsg("out of memory")));
2454
ereport(pgStatRunningInCollector ? LOG : WARNING,
2455
(errmsg("corrupted pgstat.stat file")));
2459
memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
2460
dbentry->tables = NULL;
2461
dbentry->destroy = 0;
2462
dbentry->n_backends = 0;
2465
* Don't collect tables if not the requested DB
2467
if (onlydb != InvalidOid && onlydb != dbbuf.databaseid)
2470
memset(&hash_ctl, 0, sizeof(hash_ctl));
2471
hash_ctl.keysize = sizeof(Oid);
2472
hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
2473
hash_ctl.hash = tag_hash;
2474
hash_ctl.hcxt = use_mcxt;
2475
dbentry->tables = hash_create("Per-database table",
2476
PGSTAT_TAB_HASH_SIZE,
2478
HASH_ELEM | HASH_FUNCTION | mcxt_flags);
2481
* Arrange that following 'T's add entries to this
2482
* databases tables hash table.
2484
tabhash = dbentry->tables;
2488
* 'd' End of this database.
2495
* 'T' A PgStat_StatTabEntry follows.
2498
if (fread(&tabbuf, 1, sizeof(tabbuf), fpin) != sizeof(tabbuf))
2500
ereport(pgStatRunningInCollector ? LOG : WARNING,
2501
(errmsg("corrupted pgstat.stat file")));
2506
* Skip if table belongs to a not requested database.
2508
if (tabhash == NULL)
2511
tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
2512
(void *) &tabbuf.tableid,
2513
HASH_ENTER, &found);
2514
if (tabentry == NULL)
2516
(errcode(ERRCODE_OUT_OF_MEMORY),
2517
errmsg("out of memory")));
2521
ereport(pgStatRunningInCollector ? LOG : WARNING,
2522
(errmsg("corrupted pgstat.stat file")));
2526
memcpy(tabentry, &tabbuf, sizeof(tabbuf));
2530
* 'M' The maximum number of backends to expect follows.
2533
if (betab == NULL || numbackends == NULL)
2535
if (fread(&maxbackends, 1, sizeof(maxbackends), fpin) !=
2536
sizeof(maxbackends))
2538
ereport(pgStatRunningInCollector ? LOG : WARNING,
2539
(errmsg("corrupted pgstat.stat file")));
2542
if (maxbackends == 0)
2546
* Allocate space (in TopTransactionContext too) for the
2549
if (use_mcxt == NULL)
2550
*betab = (PgStat_StatBeEntry *) palloc(
2551
sizeof(PgStat_StatBeEntry) * maxbackends);
2553
*betab = (PgStat_StatBeEntry *) MemoryContextAlloc(
2555
sizeof(PgStat_StatBeEntry) * maxbackends);
2559
* 'B' A PgStat_StatBeEntry follows.
2562
if (betab == NULL || numbackends == NULL || *betab == NULL)
2566
* Read it directly into the table.
2568
if (fread(&(*betab)[havebackends], 1,
2569
sizeof(PgStat_StatBeEntry), fpin) !=
2570
sizeof(PgStat_StatBeEntry))
2572
ereport(pgStatRunningInCollector ? LOG : WARNING,
2573
(errmsg("corrupted pgstat.stat file")));
2578
* Count backends per database here.
2580
dbentry = (PgStat_StatDBEntry *) hash_search(*dbhash,
2581
(void *) &((*betab)[havebackends].databaseid),
2584
dbentry->n_backends++;
2587
if (numbackends != 0)
2588
*numbackends = havebackends;
2589
if (havebackends >= maxbackends)
2595
* 'E' The EOF marker of a complete stats file.
2601
ereport(pgStatRunningInCollector ? LOG : WARNING,
2602
(errmsg("corrupted pgstat.stat file")));
2612
* If not done for this transaction, read the statistics collector
2613
* stats file into some hash tables.
2615
* Because we store the hash tables in TopTransactionContext, the result
2616
* is good for the entire current main transaction.
2619
backend_read_statsfile(void)
2621
TransactionId topXid = GetTopTransactionId();
2623
if (!TransactionIdEquals(pgStatDBHashXact, topXid))
2625
Assert(!pgStatRunningInCollector);
2626
pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
2627
&pgStatBeTable, &pgStatNumBackends);
2628
pgStatDBHashXact = topXid;
2634
* pgstat_recv_bestart() -
2636
* Process a backend startup message.
2640
pgstat_recv_bestart(PgStat_MsgBestart *msg, int len)
2642
pgstat_add_backend(&msg->m_hdr);
2647
* pgstat_recv_beterm() -
2649
* Process a backend termination message.
2653
pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len)
2655
pgstat_sub_backend(msg->m_hdr.m_procpid);
2660
* pgstat_recv_activity() -
2662
* Remember what the backend is doing.
2666
pgstat_recv_activity(PgStat_MsgActivity *msg, int len)
2668
PgStat_StatBeEntry *entry;
2671
* Here we check explicitly for 0 return, since we don't want to
2672
* mangle the activity of an active backend by a delayed packet from a
2675
if (pgstat_add_backend(&msg->m_hdr) != 0)
2678
entry = &(pgStatBeTable[msg->m_hdr.m_backendid - 1]);
2680
StrNCpy(entry->activity, msg->m_what, PGSTAT_ACTIVITY_SIZE);
2682
entry->activity_start_sec =
2683
GetCurrentAbsoluteTimeUsec(&entry->activity_start_usec);
2688
* pgstat_recv_tabstat() -
2690
* Count what the backend has done.
2694
pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
2696
PgStat_TableEntry *tabmsg = &(msg->m_entry[0]);
2697
PgStat_StatDBEntry *dbentry;
2698
PgStat_StatTabEntry *tabentry;
2703
* Make sure the backend is counted for.
2705
if (pgstat_add_backend(&msg->m_hdr) < 0)
2709
* Lookup the database in the hashtable.
2711
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2712
(void *) &(msg->m_hdr.m_databaseid),
2718
* If the database is marked for destroy, this is a delayed UDP packet
2719
* and not worth being counted.
2721
if (dbentry->destroy > 0)
2724
dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
2725
dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
2728
* Process all table entries in the message.
2730
for (i = 0; i < msg->m_nentries; i++)
2732
tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2733
(void *) &(tabmsg[i].t_id),
2734
HASH_ENTER, &found);
2735
if (tabentry == NULL)
2737
(errcode(ERRCODE_OUT_OF_MEMORY),
2738
errmsg("out of memory in statistics collector --- abort")));
2743
* If it's a new table entry, initialize counters to the
2744
* values we just got.
2746
tabentry->numscans = tabmsg[i].t_numscans;
2747
tabentry->tuples_returned = tabmsg[i].t_tuples_returned;
2748
tabentry->tuples_fetched = tabmsg[i].t_tuples_fetched;
2749
tabentry->tuples_inserted = tabmsg[i].t_tuples_inserted;
2750
tabentry->tuples_updated = tabmsg[i].t_tuples_updated;
2751
tabentry->tuples_deleted = tabmsg[i].t_tuples_deleted;
2752
tabentry->blocks_fetched = tabmsg[i].t_blocks_fetched;
2753
tabentry->blocks_hit = tabmsg[i].t_blocks_hit;
2755
tabentry->destroy = 0;
2760
* Otherwise add the values to the existing entry.
2762
tabentry->numscans += tabmsg[i].t_numscans;
2763
tabentry->tuples_returned += tabmsg[i].t_tuples_returned;
2764
tabentry->tuples_fetched += tabmsg[i].t_tuples_fetched;
2765
tabentry->tuples_inserted += tabmsg[i].t_tuples_inserted;
2766
tabentry->tuples_updated += tabmsg[i].t_tuples_updated;
2767
tabentry->tuples_deleted += tabmsg[i].t_tuples_deleted;
2768
tabentry->blocks_fetched += tabmsg[i].t_blocks_fetched;
2769
tabentry->blocks_hit += tabmsg[i].t_blocks_hit;
2773
* And add the block IO to the database entry.
2775
dbentry->n_blocks_fetched += tabmsg[i].t_blocks_fetched;
2776
dbentry->n_blocks_hit += tabmsg[i].t_blocks_hit;
2782
* pgstat_recv_tabpurge() -
2784
* Arrange for dead table removal.
2788
pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
2790
PgStat_StatDBEntry *dbentry;
2791
PgStat_StatTabEntry *tabentry;
2795
* Make sure the backend is counted for.
2797
if (pgstat_add_backend(&msg->m_hdr) < 0)
2801
* Lookup the database in the hashtable.
2803
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2804
(void *) &(msg->m_hdr.m_databaseid),
2810
* If the database is marked for destroy, this is a delayed UDP packet
2811
* and the tables will go away at DB destruction.
2813
if (dbentry->destroy > 0)
2817
* Process all table entries in the message.
2819
for (i = 0; i < msg->m_nentries; i++)
2821
tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2822
(void *) &(msg->m_tableid[i]),
2825
tabentry->destroy = PGSTAT_DESTROY_COUNT;
2831
* pgstat_recv_dropdb() -
2833
* Arrange for dead database removal
2837
pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
2839
PgStat_StatDBEntry *dbentry;
2842
* Make sure the backend is counted for.
2844
if (pgstat_add_backend(&msg->m_hdr) < 0)
2848
* Lookup the database in the hashtable.
2850
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2851
(void *) &(msg->m_databaseid),
2857
* Mark the database for destruction.
2859
dbentry->destroy = PGSTAT_DESTROY_COUNT;
2864
* pgstat_recv_dropdb() -
2866
* Arrange for dead database removal
2870
pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
2873
PgStat_StatDBEntry *dbentry;
2876
* Make sure the backend is counted for.
2878
if (pgstat_add_backend(&msg->m_hdr) < 0)
2882
* Lookup the database in the hashtable.
2884
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2885
(void *) &(msg->m_hdr.m_databaseid),
2891
* We simply throw away all the databases table entries by recreating
2892
* a new hash table for them.
2894
if (dbentry->tables != NULL)
2895
hash_destroy(dbentry->tables);
2897
dbentry->tables = NULL;
2898
dbentry->n_xact_commit = 0;
2899
dbentry->n_xact_rollback = 0;
2900
dbentry->n_blocks_fetched = 0;
2901
dbentry->n_blocks_hit = 0;
2902
dbentry->n_connects = 0;
2903
dbentry->destroy = 0;
2905
memset(&hash_ctl, 0, sizeof(hash_ctl));
2906
hash_ctl.keysize = sizeof(Oid);
2907
hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
2908
hash_ctl.hash = tag_hash;
2909
dbentry->tables = hash_create("Per-database table",
2910
PGSTAT_TAB_HASH_SIZE,
2912
HASH_ELEM | HASH_FUNCTION);