~vcs-imports/mammoth-replicator/trunk

« back to all changes in this revision

Viewing changes to src/backend/postmaster/pgstat.c

  • Committer: alvherre
  • Date: 2005-12-16 21:24:52 UTC
  • Revision ID: svn-v4:db760fc0-0f08-0410-9d63-cc6633f64896:trunk:1
Initial import of the REL8_0_3 sources from the Pgsql CVS repository.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* ----------
 
2
 * pgstat.c
 
3
 *
 
4
 *      All the statistics collector stuff hacked up in one big, ugly file.
 
5
 *
 
6
 *      TODO:   - Separate collector, postmaster and backend stuff
 
7
 *                        into different files.
 
8
 *
 
9
 *                      - Add some automatic call for pgstat vacuuming.
 
10
 *
 
11
 *                      - Add a pgstat config column to pg_database, so this
 
12
 *                        entire thing can be enabled/disabled on a per db basis.
 
13
 *
 
14
 *      Copyright (c) 2001-2005, PostgreSQL Global Development Group
 
15
 *
 
16
 *      $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.87.4.2 2005-03-31 23:21:09 tgl Exp $
 
17
 * ----------
 
18
 */
 
19
#include "postgres.h"
 
20
 
 
21
#include <unistd.h>
 
22
#include <fcntl.h>
 
23
#include <sys/param.h>
 
24
#include <sys/time.h>
 
25
#include <sys/socket.h>
 
26
#include <netdb.h>
 
27
#include <netinet/in.h>
 
28
#include <arpa/inet.h>
 
29
#include <signal.h>
 
30
#include <time.h>
 
31
 
 
32
#include "pgstat.h"
 
33
 
 
34
#include "access/heapam.h"
 
35
#include "access/xact.h"
 
36
#include "catalog/catname.h"
 
37
#include "catalog/pg_database.h"
 
38
#include "catalog/pg_shadow.h"
 
39
#include "libpq/libpq.h"
 
40
#include "libpq/pqsignal.h"
 
41
#include "mb/pg_wchar.h"
 
42
#include "miscadmin.h"
 
43
#include "postmaster/postmaster.h"
 
44
#include "storage/backendid.h"
 
45
#include "storage/fd.h"
 
46
#include "storage/ipc.h"
 
47
#include "storage/pg_shmem.h"
 
48
#include "storage/pmsignal.h"
 
49
#include "tcop/tcopprot.h"
 
50
#include "utils/hsearch.h"
 
51
#include "utils/memutils.h"
 
52
#include "utils/ps_status.h"
 
53
#include "utils/rel.h"
 
54
#include "utils/syscache.h"
 
55
 
 
56
 
 
57
/* ----------
 
58
 * Paths for the statistics files. The %s is replaced with the
 
59
 * installation's $PGDATA.
 
60
 * ----------
 
61
 */
 
62
#define PGSTAT_STAT_FILENAME    "%s/global/pgstat.stat"
 
63
#define PGSTAT_STAT_TMPFILE             "%s/global/pgstat.tmp.%d"
 
64
 
 
65
/* ----------
 
66
 * Timer definitions.
 
67
 * ----------
 
68
 */
 
69
#define PGSTAT_STAT_INTERVAL    500             /* How often to write the status
 
70
                                                                                 * file; in milliseconds. */
 
71
 
 
72
#define PGSTAT_DESTROY_DELAY    10000   /* How long to keep destroyed
 
73
                                                                                 * objects known, to give delayed
 
74
                                                                                 * UDP packets time to arrive; in
 
75
                                                                                 * milliseconds. */
 
76
 
 
77
#define PGSTAT_DESTROY_COUNT    (PGSTAT_DESTROY_DELAY / PGSTAT_STAT_INTERVAL)
 
78
 
 
79
#define PGSTAT_RESTART_INTERVAL 60              /* How often to attempt to restart
 
80
                                                                                 * a failed statistics collector;
 
81
                                                                                 * in seconds. */
 
82
 
 
83
/* ----------
 
84
 * Amount of space reserved in pgstat_recvbuffer().
 
85
 * ----------
 
86
 */
 
87
#define PGSTAT_RECVBUFFERSZ             ((int) (1024 * sizeof(PgStat_Msg)))
 
88
 
 
89
/* ----------
 
90
 * The initial size hints for the hash tables used in the collector.
 
91
 * ----------
 
92
 */
 
93
#define PGSTAT_DB_HASH_SIZE             16
 
94
#define PGSTAT_BE_HASH_SIZE             512
 
95
#define PGSTAT_TAB_HASH_SIZE    512
 
96
 
 
97
 
 
98
/* ----------
 
99
 * GUC parameters
 
100
 * ----------
 
101
 */
 
102
bool            pgstat_collect_startcollector = true;
 
103
bool            pgstat_collect_resetonpmstart = true;
 
104
bool            pgstat_collect_querystring = false;
 
105
bool            pgstat_collect_tuplelevel = false;
 
106
bool            pgstat_collect_blocklevel = false;
 
107
 
 
108
/* ----------
 
109
 * Local data
 
110
 * ----------
 
111
 */
 
112
NON_EXEC_STATIC int pgStatSock = -1;
 
113
NON_EXEC_STATIC int pgStatPipe[2] = {-1,-1};
 
114
static struct sockaddr_storage pgStatAddr;
 
115
static pid_t pgStatCollectorPid = 0;
 
116
 
 
117
static time_t last_pgstat_start_time;
 
118
 
 
119
static long pgStatNumMessages = 0;
 
120
 
 
121
static bool pgStatRunningInCollector = FALSE;
 
122
 
 
123
static int      pgStatTabstatAlloc = 0;
 
124
static int      pgStatTabstatUsed = 0;
 
125
static PgStat_MsgTabstat **pgStatTabstatMessages = NULL;
 
126
 
 
127
#define TABSTAT_QUANTUM         4       /* we alloc this many at a time */
 
128
 
 
129
static int      pgStatXactCommit = 0;
 
130
static int      pgStatXactRollback = 0;
 
131
 
 
132
static TransactionId pgStatDBHashXact = InvalidTransactionId;
 
133
static HTAB *pgStatDBHash = NULL;
 
134
static HTAB *pgStatBeDead = NULL;
 
135
static PgStat_StatBeEntry *pgStatBeTable = NULL;
 
136
static int      pgStatNumBackends = 0;
 
137
 
 
138
static char pgStat_fname[MAXPGPATH];
 
139
static char pgStat_tmpfname[MAXPGPATH];
 
140
 
 
141
 
 
142
/* ----------
 
143
 * Local function forward declarations
 
144
 * ----------
 
145
 */
 
146
#ifdef EXEC_BACKEND
 
147
 
 
148
typedef enum STATS_PROCESS_TYPE
 
149
{
 
150
        STAT_PROC_BUFFER,
 
151
        STAT_PROC_COLLECTOR
 
152
}       STATS_PROCESS_TYPE;
 
153
 
 
154
static pid_t pgstat_forkexec(STATS_PROCESS_TYPE procType);
 
155
static void pgstat_parseArgs(int argc, char *argv[]);
 
156
#endif
 
157
 
 
158
NON_EXEC_STATIC void PgstatBufferMain(int argc, char *argv[]);
 
159
NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
 
160
static void pgstat_recvbuffer(void);
 
161
static void pgstat_exit(SIGNAL_ARGS);
 
162
static void pgstat_die(SIGNAL_ARGS);
 
163
static void pgstat_beshutdown_hook(int code, Datum arg);
 
164
 
 
165
static int      pgstat_add_backend(PgStat_MsgHdr *msg);
 
166
static void pgstat_sub_backend(int procpid);
 
167
static void pgstat_drop_database(Oid databaseid);
 
168
static void pgstat_write_statsfile(void);
 
169
static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
 
170
                                          PgStat_StatBeEntry **betab,
 
171
                                          int *numbackends);
 
172
static void backend_read_statsfile(void);
 
173
 
 
174
static void pgstat_setheader(PgStat_MsgHdr *hdr, int mtype);
 
175
static void pgstat_send(void *msg, int len);
 
176
 
 
177
static void pgstat_recv_bestart(PgStat_MsgBestart *msg, int len);
 
178
static void pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len);
 
179
static void pgstat_recv_activity(PgStat_MsgActivity *msg, int len);
 
180
static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
 
181
static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
 
182
static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
 
183
static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
 
184
 
 
185
 
 
186
/* ------------------------------------------------------------
 
187
 * Public functions called from postmaster follow
 
188
 * ------------------------------------------------------------
 
189
 */
 
190
 
 
191
/* ----------
 
192
 * pgstat_init() -
 
193
 *
 
194
 *      Called from postmaster at startup. Create the resources required
 
195
 *      by the statistics collector process.  If unable to do so, do not
 
196
 *      fail --- better to let the postmaster start with stats collection
 
197
 *      disabled.
 
198
 * ----------
 
199
 */
 
200
void
 
201
pgstat_init(void)
 
202
{
 
203
        ACCEPT_TYPE_ARG3 alen;
 
204
        struct addrinfo *addrs = NULL,
 
205
                           *addr,
 
206
                                hints;
 
207
        int                     ret;
 
208
        fd_set          rset;
 
209
        struct timeval tv;
 
210
        char            test_byte;
 
211
        int                     sel_res;
 
212
 
 
213
#define TESTBYTEVAL ((char) 199)
 
214
 
 
215
        /*
 
216
         * Force start of collector daemon if something to collect
 
217
         */
 
218
        if (pgstat_collect_querystring ||
 
219
                pgstat_collect_tuplelevel ||
 
220
                pgstat_collect_blocklevel)
 
221
                pgstat_collect_startcollector = true;
 
222
 
 
223
        /*
 
224
         * Initialize the filename for the status reports.      (In the
 
225
         * EXEC_BACKEND case, this only sets the value in the postmaster.  The
 
226
         * collector subprocess will recompute the value for itself, and
 
227
         * individual backends must do so also if they want to access the
 
228
         * file.)
 
229
         */
 
230
        snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
 
231
 
 
232
        /*
 
233
         * If we don't have to start a collector or should reset the collected
 
234
         * statistics on postmaster start, simply remove the file.
 
235
         */
 
236
        if (!pgstat_collect_startcollector || pgstat_collect_resetonpmstart)
 
237
                unlink(pgStat_fname);
 
238
 
 
239
        /*
 
240
         * Nothing else required if collector will not get started
 
241
         */
 
242
        if (!pgstat_collect_startcollector)
 
243
                return;
 
244
 
 
245
        /*
 
246
         * Create the UDP socket for sending and receiving statistic messages
 
247
         */
 
248
        hints.ai_flags = AI_PASSIVE;
 
249
        hints.ai_family = PF_UNSPEC;
 
250
        hints.ai_socktype = SOCK_DGRAM;
 
251
        hints.ai_protocol = 0;
 
252
        hints.ai_addrlen = 0;
 
253
        hints.ai_addr = NULL;
 
254
        hints.ai_canonname = NULL;
 
255
        hints.ai_next = NULL;
 
256
        ret = getaddrinfo_all("localhost", NULL, &hints, &addrs);
 
257
        if (ret || !addrs)
 
258
        {
 
259
                ereport(LOG,
 
260
                                (errmsg("could not resolve \"localhost\": %s",
 
261
                                                gai_strerror(ret))));
 
262
                goto startup_failed;
 
263
        }
 
264
 
 
265
        /*
 
266
         * On some platforms, getaddrinfo_all() may return multiple addresses
 
267
         * only one of which will actually work (eg, both IPv6 and IPv4
 
268
         * addresses when kernel will reject IPv6).  Worse, the failure may
 
269
         * occur at the bind() or perhaps even connect() stage.  So we must
 
270
         * loop through the results till we find a working combination.  We
 
271
         * will generate LOG messages, but no error, for bogus combinations.
 
272
         */
 
273
        for (addr = addrs; addr; addr = addr->ai_next)
 
274
        {
 
275
#ifdef HAVE_UNIX_SOCKETS
 
276
                /* Ignore AF_UNIX sockets, if any are returned. */
 
277
                if (addr->ai_family == AF_UNIX)
 
278
                        continue;
 
279
#endif
 
280
 
 
281
                /*
 
282
                 * Create the socket.
 
283
                 */
 
284
                if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) < 0)
 
285
                {
 
286
                        ereport(LOG,
 
287
                                        (errcode_for_socket_access(),
 
288
                                         errmsg("could not create socket for statistics collector: %m")));
 
289
                        continue;
 
290
                }
 
291
 
 
292
                /*
 
293
                 * Bind it to a kernel assigned port on localhost and get the
 
294
                 * assigned port via getsockname().
 
295
                 */
 
296
                if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
 
297
                {
 
298
                        ereport(LOG,
 
299
                                        (errcode_for_socket_access(),
 
300
                                         errmsg("could not bind socket for statistics collector: %m")));
 
301
                        closesocket(pgStatSock);
 
302
                        pgStatSock = -1;
 
303
                        continue;
 
304
                }
 
305
 
 
306
                alen = sizeof(pgStatAddr);
 
307
                if (getsockname(pgStatSock, (struct sockaddr *) & pgStatAddr, &alen) < 0)
 
308
                {
 
309
                        ereport(LOG,
 
310
                                        (errcode_for_socket_access(),
 
311
                                         errmsg("could not get address of socket for statistics collector: %m")));
 
312
                        closesocket(pgStatSock);
 
313
                        pgStatSock = -1;
 
314
                        continue;
 
315
                }
 
316
 
 
317
                /*
 
318
                 * Connect the socket to its own address.  This saves a few cycles
 
319
                 * by not having to respecify the target address on every send.
 
320
                 * This also provides a kernel-level check that only packets from
 
321
                 * this same address will be received.
 
322
                 */
 
323
                if (connect(pgStatSock, (struct sockaddr *) & pgStatAddr, alen) < 0)
 
324
                {
 
325
                        ereport(LOG,
 
326
                                        (errcode_for_socket_access(),
 
327
                                         errmsg("could not connect socket for statistics collector: %m")));
 
328
                        closesocket(pgStatSock);
 
329
                        pgStatSock = -1;
 
330
                        continue;
 
331
                }
 
332
 
 
333
                /*
 
334
                 * Try to send and receive a one-byte test message on the socket.
 
335
                 * This is to catch situations where the socket can be created but
 
336
                 * will not actually pass data (for instance, because kernel
 
337
                 * packet filtering rules prevent it).
 
338
                 */
 
339
                test_byte = TESTBYTEVAL;
 
340
                if (send(pgStatSock, &test_byte, 1, 0) != 1)
 
341
                {
 
342
                        ereport(LOG,
 
343
                                        (errcode_for_socket_access(),
 
344
                                         errmsg("could not send test message on socket for statistics collector: %m")));
 
345
                        closesocket(pgStatSock);
 
346
                        pgStatSock = -1;
 
347
                        continue;
 
348
                }
 
349
 
 
350
                /*
 
351
                 * There could possibly be a little delay before the message can
 
352
                 * be received.  We arbitrarily allow up to half a second before
 
353
                 * deciding it's broken.
 
354
                 */
 
355
                for (;;)                                /* need a loop to handle EINTR */
 
356
                {
 
357
                        FD_ZERO(&rset);
 
358
                        FD_SET(pgStatSock, &rset);
 
359
                        tv.tv_sec = 0;
 
360
                        tv.tv_usec = 500000;
 
361
                        sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
 
362
                        if (sel_res >= 0 || errno != EINTR)
 
363
                                break;
 
364
                }
 
365
                if (sel_res < 0)
 
366
                {
 
367
                        ereport(LOG,
 
368
                                        (errcode_for_socket_access(),
 
369
                                 errmsg("select() failed in statistics collector: %m")));
 
370
                        closesocket(pgStatSock);
 
371
                        pgStatSock = -1;
 
372
                        continue;
 
373
                }
 
374
                if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
 
375
                {
 
376
                        /*
 
377
                         * This is the case we actually think is likely, so take pains
 
378
                         * to give a specific message for it.
 
379
                         *
 
380
                         * errno will not be set meaningfully here, so don't use it.
 
381
                         */
 
382
                        ereport(LOG,
 
383
                                        (errcode(ERRCODE_CONNECTION_FAILURE),
 
384
                                         errmsg("test message did not get through on socket for statistics collector")));
 
385
                        closesocket(pgStatSock);
 
386
                        pgStatSock = -1;
 
387
                        continue;
 
388
                }
 
389
 
 
390
                test_byte++;                    /* just make sure variable is changed */
 
391
 
 
392
                if (recv(pgStatSock, &test_byte, 1, 0) != 1)
 
393
                {
 
394
                        ereport(LOG,
 
395
                                        (errcode_for_socket_access(),
 
396
                                         errmsg("could not receive test message on socket for statistics collector: %m")));
 
397
                        closesocket(pgStatSock);
 
398
                        pgStatSock = -1;
 
399
                        continue;
 
400
                }
 
401
 
 
402
                if (test_byte != TESTBYTEVAL)   /* strictly paranoia ... */
 
403
                {
 
404
                        ereport(LOG,
 
405
                                        (errcode(ERRCODE_INTERNAL_ERROR),
 
406
                                         errmsg("incorrect test message transmission on socket for statistics collector")));
 
407
                        closesocket(pgStatSock);
 
408
                        pgStatSock = -1;
 
409
                        continue;
 
410
                }
 
411
 
 
412
                /* If we get here, we have a working socket */
 
413
                break;
 
414
        }
 
415
 
 
416
        /* Did we find a working address? */
 
417
        if (!addr || pgStatSock < 0)
 
418
                goto startup_failed;
 
419
 
 
420
        /*
 
421
         * Set the socket to non-blocking IO.  This ensures that if the
 
422
         * collector falls behind (despite the buffering process), statistics
 
423
         * messages will be discarded; backends won't block waiting to send
 
424
         * messages to the collector.
 
425
         */
 
426
        if (!pg_set_noblock(pgStatSock))
 
427
        {
 
428
                ereport(LOG,
 
429
                                (errcode_for_socket_access(),
 
430
                                 errmsg("could not set statistics collector socket to nonblocking mode: %m")));
 
431
                goto startup_failed;
 
432
        }
 
433
 
 
434
        freeaddrinfo_all(hints.ai_family, addrs);
 
435
 
 
436
        return;
 
437
 
 
438
startup_failed:
 
439
        ereport(LOG,
 
440
                        (errmsg("disabling statistics collector for lack of working socket")));
 
441
 
 
442
        if (addrs)
 
443
                freeaddrinfo_all(hints.ai_family, addrs);
 
444
 
 
445
        if (pgStatSock >= 0)
 
446
                closesocket(pgStatSock);
 
447
        pgStatSock = -1;
 
448
 
 
449
        /* Adjust GUC variables to suppress useless activity */
 
450
        pgstat_collect_startcollector = false;
 
451
        pgstat_collect_querystring = false;
 
452
        pgstat_collect_tuplelevel = false;
 
453
        pgstat_collect_blocklevel = false;
 
454
}
 
455
 
 
456
 
 
457
#ifdef EXEC_BACKEND
 
458
 
 
459
/*
 
460
 * pgstat_forkexec() -
 
461
 *
 
462
 * Format up the arglist for, then fork and exec, statistics
 
463
 * (buffer and collector) processes
 
464
 */
 
465
static pid_t
 
466
pgstat_forkexec(STATS_PROCESS_TYPE procType)
 
467
{
 
468
        char       *av[10];
 
469
        int                     ac = 0,
 
470
                                bufc = 0,
 
471
                                i;
 
472
        char            pgstatBuf[2][32];
 
473
 
 
474
        av[ac++] = "postgres";
 
475
 
 
476
        switch (procType)
 
477
        {
 
478
                case STAT_PROC_BUFFER:
 
479
                        av[ac++] = "-forkbuf";
 
480
                        break;
 
481
 
 
482
                case STAT_PROC_COLLECTOR:
 
483
                        av[ac++] = "-forkcol";
 
484
                        break;
 
485
 
 
486
                default:
 
487
                        Assert(false);
 
488
        }
 
489
 
 
490
        av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
 
491
 
 
492
        /* postgres_exec_path is not passed by write_backend_variables */
 
493
        av[ac++] = postgres_exec_path;
 
494
 
 
495
        /* Add to the arg list */
 
496
        Assert(bufc <= lengthof(pgstatBuf));
 
497
        for (i = 0; i < bufc; i++)
 
498
                av[ac++] = pgstatBuf[i];
 
499
 
 
500
        av[ac] = NULL;
 
501
        Assert(ac < lengthof(av));
 
502
 
 
503
        return postmaster_forkexec(ac, av);
 
504
}
 
505
 
 
506
 
 
507
/*
 
508
 * pgstat_parseArgs() -
 
509
 *
 
510
 * Extract data from the arglist for exec'ed statistics
 
511
 * (buffer and collector) processes
 
512
 */
 
513
static void
 
514
pgstat_parseArgs(int argc, char *argv[])
 
515
{
 
516
        Assert(argc == 4);
 
517
 
 
518
        argc = 3;
 
519
        StrNCpy(postgres_exec_path, argv[argc++], MAXPGPATH);
 
520
}
 
521
#endif   /* EXEC_BACKEND */
 
522
 
 
523
 
 
524
/* ----------
 
525
 * pgstat_start() -
 
526
 *
 
527
 *      Called from postmaster at startup or after an existing collector
 
528
 *      died.  Attempt to fire up a fresh statistics collector.
 
529
 *
 
530
 *      Returns PID of child process, or 0 if fail.
 
531
 *
 
532
 *      Note: if fail, we will be called again from the postmaster main loop.
 
533
 * ----------
 
534
 */
 
535
int
 
536
pgstat_start(void)
 
537
{
 
538
        time_t          curtime;
 
539
        pid_t           pgStatPid;
 
540
 
 
541
        /*
 
542
         * Do nothing if no collector needed
 
543
         */
 
544
        if (!pgstat_collect_startcollector)
 
545
                return 0;
 
546
 
 
547
        /*
 
548
         * Do nothing if too soon since last collector start.  This is a
 
549
         * safety valve to protect against continuous respawn attempts if the
 
550
         * collector is dying immediately at launch.  Note that since we will
 
551
         * be re-called from the postmaster main loop, we will get another
 
552
         * chance later.
 
553
         */
 
554
        curtime = time(NULL);
 
555
        if ((unsigned int) (curtime - last_pgstat_start_time) <
 
556
                (unsigned int) PGSTAT_RESTART_INTERVAL)
 
557
                return 0;
 
558
        last_pgstat_start_time = curtime;
 
559
 
 
560
        /*
 
561
         * Check that the socket is there, else pgstat_init failed.
 
562
         */
 
563
        if (pgStatSock < 0)
 
564
        {
 
565
                ereport(LOG,
 
566
                                (errmsg("statistics collector startup skipped")));
 
567
 
 
568
                /*
 
569
                 * We can only get here if someone tries to manually turn
 
570
                 * pgstat_collect_startcollector on after it had been off.
 
571
                 */
 
572
                pgstat_collect_startcollector = false;
 
573
                return 0;
 
574
        }
 
575
 
 
576
        /*
 
577
         * Okay, fork off the collector.
 
578
         */
 
579
 
 
580
        fflush(stdout);
 
581
        fflush(stderr);
 
582
 
 
583
#ifdef __BEOS__
 
584
        /* Specific beos actions before backend startup */
 
585
        beos_before_backend_startup();
 
586
#endif
 
587
 
 
588
#ifdef EXEC_BACKEND
 
589
        switch ((pgStatPid = pgstat_forkexec(STAT_PROC_BUFFER)))
 
590
#else
 
591
        switch ((pgStatPid = fork()))
 
592
#endif
 
593
        {
 
594
                case -1:
 
595
#ifdef __BEOS__
 
596
                        /* Specific beos actions */
 
597
                        beos_backend_startup_failed();
 
598
#endif
 
599
                        ereport(LOG,
 
600
                                        (errmsg("could not fork statistics buffer: %m")));
 
601
                        return 0;
 
602
 
 
603
#ifndef EXEC_BACKEND
 
604
                case 0:
 
605
                        /* in postmaster child ... */
 
606
#ifdef __BEOS__
 
607
                        /* Specific beos actions after backend startup */
 
608
                        beos_backend_startup();
 
609
#endif
 
610
                        /* Close the postmaster's sockets */
 
611
                        ClosePostmasterPorts(false);
 
612
 
 
613
                        /* Drop our connection to postmaster's shared memory, as well */
 
614
                        PGSharedMemoryDetach();
 
615
 
 
616
                        PgstatBufferMain(0, NULL);
 
617
                        break;
 
618
#endif
 
619
 
 
620
                default:
 
621
                        return (int) pgStatPid;
 
622
        }
 
623
 
 
624
        /* shouldn't get here */
 
625
        return 0;
 
626
}
 
627
 
 
628
 
 
629
/* ----------
 
630
 * pgstat_beterm() -
 
631
 *
 
632
 *      Called from postmaster to tell collector a backend terminated.
 
633
 * ----------
 
634
 */
 
635
void
 
636
pgstat_beterm(int pid)
 
637
{
 
638
        PgStat_MsgBeterm msg;
 
639
 
 
640
        if (pgStatSock < 0)
 
641
                return;
 
642
 
 
643
        MemSet(&(msg.m_hdr), 0, sizeof(msg.m_hdr));
 
644
        msg.m_hdr.m_type = PGSTAT_MTYPE_BETERM;
 
645
        msg.m_hdr.m_procpid = pid;
 
646
 
 
647
        pgstat_send(&msg, sizeof(msg));
 
648
}
 
649
 
 
650
 
 
651
/* ------------------------------------------------------------
 
652
 * Public functions used by backends follow
 
653
 *------------------------------------------------------------
 
654
 */
 
655
 
 
656
 
 
657
/* ----------
 
658
 * pgstat_bestart() -
 
659
 *
 
660
 *      Tell the collector that this new backend is soon ready to process
 
661
 *      queries. Called from tcop/postgres.c before entering the mainloop.
 
662
 * ----------
 
663
 */
 
664
void
 
665
pgstat_bestart(void)
 
666
{
 
667
        PgStat_MsgBestart msg;
 
668
 
 
669
        if (pgStatSock < 0)
 
670
                return;
 
671
 
 
672
        pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_BESTART);
 
673
        pgstat_send(&msg, sizeof(msg));
 
674
 
 
675
        /*
 
676
         * Set up a process-exit hook to ensure we flush the last batch of
 
677
         * statistics to the collector.
 
678
         */
 
679
        on_proc_exit(pgstat_beshutdown_hook, 0);
 
680
}
 
681
 
 
682
/*
 
683
 * Flush any remaining statistics counts out to the collector at process
 
684
 * exit.   Without this, operations triggered during backend exit (such as
 
685
 * temp table deletions) won't be counted.  This is an on_proc_exit hook,
 
686
 * not on_shmem_exit, so that everything interesting must have happened
 
687
 * already.
 
688
 */
 
689
static void
 
690
pgstat_beshutdown_hook(int code, Datum arg)
 
691
{
 
692
        pgstat_report_tabstat();
 
693
}
 
694
 
 
695
 
 
696
/* ----------
 
697
 * pgstat_report_activity() -
 
698
 *
 
699
 *      Called from tcop/postgres.c to tell the collector what the backend
 
700
 *      is actually doing (usually "<IDLE>" or the start of the query to
 
701
 *      be executed).
 
702
 * ----------
 
703
 */
 
704
void
 
705
pgstat_report_activity(const char *what)
 
706
{
 
707
        PgStat_MsgActivity msg;
 
708
        int                     len;
 
709
 
 
710
        if (!pgstat_collect_querystring || pgStatSock < 0)
 
711
                return;
 
712
 
 
713
        len = strlen(what);
 
714
        len = pg_mbcliplen((const unsigned char *) what, len,
 
715
                                           PGSTAT_ACTIVITY_SIZE - 1);
 
716
 
 
717
        memcpy(msg.m_what, what, len);
 
718
        msg.m_what[len] = '\0';
 
719
        len += offsetof(PgStat_MsgActivity, m_what) +1;
 
720
 
 
721
        pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ACTIVITY);
 
722
        pgstat_send(&msg, len);
 
723
}
 
724
 
 
725
 
 
726
/* ----------
 
727
 * pgstat_report_tabstat() -
 
728
 *
 
729
 *      Called from tcop/postgres.c to send the so far collected
 
730
 *      per table access statistics to the collector.
 
731
 * ----------
 
732
 */
 
733
void
 
734
pgstat_report_tabstat(void)
 
735
{
 
736
        int                     i;
 
737
 
 
738
        if (pgStatSock < 0 ||
 
739
                !(pgstat_collect_querystring ||
 
740
                  pgstat_collect_tuplelevel ||
 
741
                  pgstat_collect_blocklevel))
 
742
        {
 
743
                /* Not reporting stats, so just flush whatever we have */
 
744
                pgStatTabstatUsed = 0;
 
745
                return;
 
746
        }
 
747
 
 
748
        /*
 
749
         * For each message buffer used during the last query set the header
 
750
         * fields and send it out.
 
751
         */
 
752
        for (i = 0; i < pgStatTabstatUsed; i++)
 
753
        {
 
754
                PgStat_MsgTabstat *tsmsg = pgStatTabstatMessages[i];
 
755
                int                     n;
 
756
                int                     len;
 
757
 
 
758
                n = tsmsg->m_nentries;
 
759
                len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
 
760
                        n * sizeof(PgStat_TableEntry);
 
761
 
 
762
                tsmsg->m_xact_commit = pgStatXactCommit;
 
763
                tsmsg->m_xact_rollback = pgStatXactRollback;
 
764
                pgStatXactCommit = 0;
 
765
                pgStatXactRollback = 0;
 
766
 
 
767
                pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
 
768
                pgstat_send(tsmsg, len);
 
769
        }
 
770
 
 
771
        pgStatTabstatUsed = 0;
 
772
}
 
773
 
 
774
 
 
775
/* ----------
 
776
 * pgstat_vacuum_tabstat() -
 
777
 *
 
778
 *      Will tell the collector about objects he can get rid of.
 
779
 * ----------
 
780
 */
 
781
int
 
782
pgstat_vacuum_tabstat(void)
 
783
{
 
784
        Relation        dbrel;
 
785
        HeapScanDesc dbscan;
 
786
        HeapTuple       dbtup;
 
787
        Oid                *dbidlist;
 
788
        int                     dbidalloc;
 
789
        int                     dbidused;
 
790
        HASH_SEQ_STATUS hstat;
 
791
        PgStat_StatDBEntry *dbentry;
 
792
        PgStat_StatTabEntry *tabentry;
 
793
        HeapTuple       reltup;
 
794
        int                     nobjects = 0;
 
795
        PgStat_MsgTabpurge msg;
 
796
        int                     len;
 
797
        int                     i;
 
798
 
 
799
        if (pgStatSock < 0)
 
800
                return 0;
 
801
 
 
802
        /*
 
803
         * If not done for this transaction, read the statistics collector
 
804
         * stats file into some hash tables.
 
805
         */
 
806
        backend_read_statsfile();
 
807
 
 
808
        /*
 
809
         * Lookup our own database entry
 
810
         */
 
811
        dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 
812
                                                                                                 (void *) &MyDatabaseId,
 
813
                                                                                                 HASH_FIND, NULL);
 
814
        if (dbentry == NULL)
 
815
                return -1;
 
816
 
 
817
        if (dbentry->tables == NULL)
 
818
                return 0;
 
819
 
 
820
        /*
 
821
         * Initialize our messages table counter to zero
 
822
         */
 
823
        msg.m_nentries = 0;
 
824
 
 
825
        /*
 
826
         * Check for all tables if they still exist.
 
827
         */
 
828
        hash_seq_init(&hstat, dbentry->tables);
 
829
        while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
 
830
        {
 
831
                /*
 
832
                 * Check if this relation is still alive by looking up it's
 
833
                 * pg_class tuple in the system catalog cache.
 
834
                 */
 
835
                reltup = SearchSysCache(RELOID,
 
836
                                                                ObjectIdGetDatum(tabentry->tableid),
 
837
                                                                0, 0, 0);
 
838
                if (HeapTupleIsValid(reltup))
 
839
                {
 
840
                        ReleaseSysCache(reltup);
 
841
                        continue;
 
842
                }
 
843
 
 
844
                /*
 
845
                 * Add this tables Oid to the message
 
846
                 */
 
847
                msg.m_tableid[msg.m_nentries++] = tabentry->tableid;
 
848
                nobjects++;
 
849
 
 
850
                /*
 
851
                 * If the message is full, send it out and reinitialize ot zero
 
852
                 */
 
853
                if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
 
854
                {
 
855
                        len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
 
856
                                +msg.m_nentries * sizeof(Oid);
 
857
 
 
858
                        pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
 
859
                        pgstat_send(&msg, len);
 
860
 
 
861
                        msg.m_nentries = 0;
 
862
                }
 
863
        }
 
864
 
 
865
        /*
 
866
         * Send the rest
 
867
         */
 
868
        if (msg.m_nentries > 0)
 
869
        {
 
870
                len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
 
871
                        +msg.m_nentries * sizeof(Oid);
 
872
 
 
873
                pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
 
874
                pgstat_send(&msg, len);
 
875
        }
 
876
 
 
877
        /*
 
878
         * Read pg_database and remember the Oid's of all existing databases
 
879
         */
 
880
        dbidalloc = 256;
 
881
        dbidused = 0;
 
882
        dbidlist = (Oid *) palloc(sizeof(Oid) * dbidalloc);
 
883
 
 
884
        dbrel = heap_openr(DatabaseRelationName, AccessShareLock);
 
885
        dbscan = heap_beginscan(dbrel, SnapshotNow, 0, NULL);
 
886
        while ((dbtup = heap_getnext(dbscan, ForwardScanDirection)) != NULL)
 
887
        {
 
888
                if (dbidused >= dbidalloc)
 
889
                {
 
890
                        dbidalloc *= 2;
 
891
                        dbidlist = (Oid *) repalloc((char *) dbidlist,
 
892
                                                                                sizeof(Oid) * dbidalloc);
 
893
                }
 
894
                dbidlist[dbidused++] = HeapTupleGetOid(dbtup);
 
895
        }
 
896
        heap_endscan(dbscan);
 
897
        heap_close(dbrel, AccessShareLock);
 
898
 
 
899
        /*
 
900
         * Search the database hash table for dead databases and tell the
 
901
         * collector to drop them as well.
 
902
         */
 
903
        hash_seq_init(&hstat, pgStatDBHash);
 
904
        while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
 
905
        {
 
906
                Oid                     dbid = dbentry->databaseid;
 
907
 
 
908
                for (i = 0; i < dbidused; i++)
 
909
                {
 
910
                        if (dbidlist[i] == dbid)
 
911
                        {
 
912
                                dbid = InvalidOid;
 
913
                                break;
 
914
                        }
 
915
                }
 
916
 
 
917
                if (dbid != InvalidOid)
 
918
                {
 
919
                        nobjects++;
 
920
                        pgstat_drop_database(dbid);
 
921
                }
 
922
        }
 
923
 
 
924
        /*
 
925
         * Free the dbid list.
 
926
         */
 
927
        pfree(dbidlist);
 
928
 
 
929
        /*
 
930
         * Tell the caller how many removeable objects we found
 
931
         */
 
932
        return nobjects;
 
933
}
 
934
 
 
935
 
 
936
/* ----------
 
937
 * pgstat_drop_database() -
 
938
 *
 
939
 *      Tell the collector that we just dropped a database.
 
940
 *      This is the only message that shouldn't get lost in space. Otherwise
 
941
 *      the collector will keep the statistics for the dead DB until his
 
942
 *      stats file got removed while the postmaster is down.
 
943
 * ----------
 
944
 */
 
945
static void
 
946
pgstat_drop_database(Oid databaseid)
 
947
{
 
948
        PgStat_MsgDropdb msg;
 
949
 
 
950
        if (pgStatSock < 0)
 
951
                return;
 
952
 
 
953
        msg.m_databaseid = databaseid;
 
954
 
 
955
        pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
 
956
        pgstat_send(&msg, sizeof(msg));
 
957
}
 
958
 
 
959
 
 
960
/* ----------
 
961
 * pgstat_reset_counters() -
 
962
 *
 
963
 *      Tell the statistics collector to reset counters for our database.
 
964
 * ----------
 
965
 */
 
966
void
 
967
pgstat_reset_counters(void)
 
968
{
 
969
        PgStat_MsgResetcounter msg;
 
970
 
 
971
        if (pgStatSock < 0)
 
972
                return;
 
973
 
 
974
        if (!superuser())
 
975
                ereport(ERROR,
 
976
                                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 
977
                          errmsg("must be superuser to reset statistics counters")));
 
978
 
 
979
        pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETCOUNTER);
 
980
        pgstat_send(&msg, sizeof(msg));
 
981
}
 
982
 
 
983
 
 
984
/* ----------
 
985
 * pgstat_ping() -
 
986
 *
 
987
 *      Send some junk data to the collector to increase traffic.
 
988
 * ----------
 
989
 */
 
990
void
 
991
pgstat_ping(void)
 
992
{
 
993
        PgStat_MsgDummy msg;
 
994
 
 
995
        if (pgStatSock < 0)
 
996
                return;
 
997
 
 
998
        pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DUMMY);
 
999
        pgstat_send(&msg, sizeof(msg));
 
1000
}
 
1001
 
 
1002
/*
 
1003
 * Create or enlarge the pgStatTabstatMessages array
 
1004
 */
 
1005
static void
 
1006
more_tabstat_space(void)
 
1007
{
 
1008
        PgStat_MsgTabstat *newMessages;
 
1009
        PgStat_MsgTabstat **msgArray;
 
1010
        int                     newAlloc = pgStatTabstatAlloc + TABSTAT_QUANTUM;
 
1011
        int                     i;
 
1012
 
 
1013
        /* Create (another) quantum of message buffers */
 
1014
        newMessages = (PgStat_MsgTabstat *)
 
1015
                MemoryContextAllocZero(TopMemoryContext,
 
1016
                                                           sizeof(PgStat_MsgTabstat) * TABSTAT_QUANTUM);
 
1017
 
 
1018
        /* Create or enlarge the pointer array */
 
1019
        if (pgStatTabstatMessages == NULL)
 
1020
                msgArray = (PgStat_MsgTabstat **)
 
1021
                        MemoryContextAlloc(TopMemoryContext,
 
1022
                                                           sizeof(PgStat_MsgTabstat *) * newAlloc);
 
1023
        else
 
1024
                msgArray = (PgStat_MsgTabstat **)
 
1025
                        repalloc(pgStatTabstatMessages,
 
1026
                                         sizeof(PgStat_MsgTabstat *) * newAlloc);
 
1027
 
 
1028
        for (i = 0; i < TABSTAT_QUANTUM; i++)
 
1029
                msgArray[pgStatTabstatAlloc + i] = newMessages++;
 
1030
        pgStatTabstatMessages = msgArray;
 
1031
        pgStatTabstatAlloc = newAlloc;
 
1032
 
 
1033
        Assert(pgStatTabstatUsed < pgStatTabstatAlloc);
 
1034
}
 
1035
 
 
1036
/* ----------
 
1037
 * pgstat_initstats() -
 
1038
 *
 
1039
 *      Called from various places usually dealing with initialization
 
1040
 *      of Relation or Scan structures. The data placed into these
 
1041
 *      structures from here tell where later to count for buffer reads,
 
1042
 *      scans and tuples fetched.
 
1043
 * ----------
 
1044
 */
 
1045
void
 
1046
pgstat_initstats(PgStat_Info *stats, Relation rel)
 
1047
{
 
1048
        Oid                     rel_id = rel->rd_id;
 
1049
        PgStat_TableEntry *useent;
 
1050
        PgStat_MsgTabstat *tsmsg;
 
1051
        int                     mb;
 
1052
        int                     i;
 
1053
 
 
1054
        /*
 
1055
         * Initialize data not to count at all.
 
1056
         */
 
1057
        stats->tabentry = NULL;
 
1058
        stats->no_stats = FALSE;
 
1059
        stats->heap_scan_counted = FALSE;
 
1060
        stats->index_scan_counted = FALSE;
 
1061
 
 
1062
        if (pgStatSock < 0 ||
 
1063
                !(pgstat_collect_tuplelevel ||
 
1064
                  pgstat_collect_blocklevel))
 
1065
        {
 
1066
                stats->no_stats = TRUE;
 
1067
                return;
 
1068
        }
 
1069
 
 
1070
        /*
 
1071
         * Search the already-used message slots for this relation.
 
1072
         */
 
1073
        for (mb = 0; mb < pgStatTabstatUsed; mb++)
 
1074
        {
 
1075
                tsmsg = pgStatTabstatMessages[mb];
 
1076
 
 
1077
                for (i = tsmsg->m_nentries; --i >= 0;)
 
1078
                {
 
1079
                        if (tsmsg->m_entry[i].t_id == rel_id)
 
1080
                        {
 
1081
                                stats->tabentry = (void *) &(tsmsg->m_entry[i]);
 
1082
                                return;
 
1083
                        }
 
1084
                }
 
1085
 
 
1086
                if (tsmsg->m_nentries >= PGSTAT_NUM_TABENTRIES)
 
1087
                        continue;
 
1088
 
 
1089
                /*
 
1090
                 * Not found, but found a message buffer with an empty slot
 
1091
                 * instead. Fine, let's use this one.
 
1092
                 */
 
1093
                i = tsmsg->m_nentries++;
 
1094
                useent = &tsmsg->m_entry[i];
 
1095
                MemSet(useent, 0, sizeof(PgStat_TableEntry));
 
1096
                useent->t_id = rel_id;
 
1097
                stats->tabentry = (void *) useent;
 
1098
                return;
 
1099
        }
 
1100
 
 
1101
        /*
 
1102
         * If we ran out of message buffers, we just allocate more.
 
1103
         */
 
1104
        if (pgStatTabstatUsed >= pgStatTabstatAlloc)
 
1105
                more_tabstat_space();
 
1106
 
 
1107
        /*
 
1108
         * Use the first entry of the next message buffer.
 
1109
         */
 
1110
        mb = pgStatTabstatUsed++;
 
1111
        tsmsg = pgStatTabstatMessages[mb];
 
1112
        tsmsg->m_nentries = 1;
 
1113
        useent = &tsmsg->m_entry[0];
 
1114
        MemSet(useent, 0, sizeof(PgStat_TableEntry));
 
1115
        useent->t_id = rel_id;
 
1116
        stats->tabentry = (void *) useent;
 
1117
}
 
1118
 
 
1119
 
 
1120
/* ----------
 
1121
 * pgstat_count_xact_commit() -
 
1122
 *
 
1123
 *      Called from access/transam/xact.c to count transaction commits.
 
1124
 * ----------
 
1125
 */
 
1126
void
 
1127
pgstat_count_xact_commit(void)
 
1128
{
 
1129
        if (!(pgstat_collect_querystring ||
 
1130
                  pgstat_collect_tuplelevel ||
 
1131
                  pgstat_collect_blocklevel))
 
1132
                return;
 
1133
 
 
1134
        pgStatXactCommit++;
 
1135
 
 
1136
        /*
 
1137
         * If there was no relation activity yet, just make one existing
 
1138
         * message buffer used without slots, causing the next report to tell
 
1139
         * new xact-counters.
 
1140
         */
 
1141
        if (pgStatTabstatAlloc == 0)
 
1142
                more_tabstat_space();
 
1143
 
 
1144
        if (pgStatTabstatUsed == 0)
 
1145
        {
 
1146
                pgStatTabstatUsed++;
 
1147
                pgStatTabstatMessages[0]->m_nentries = 0;
 
1148
        }
 
1149
}
 
1150
 
 
1151
 
 
1152
/* ----------
 
1153
 * pgstat_count_xact_rollback() -
 
1154
 *
 
1155
 *      Called from access/transam/xact.c to count transaction rollbacks.
 
1156
 * ----------
 
1157
 */
 
1158
void
 
1159
pgstat_count_xact_rollback(void)
 
1160
{
 
1161
        if (!(pgstat_collect_querystring ||
 
1162
                  pgstat_collect_tuplelevel ||
 
1163
                  pgstat_collect_blocklevel))
 
1164
                return;
 
1165
 
 
1166
        pgStatXactRollback++;
 
1167
 
 
1168
        /*
 
1169
         * If there was no relation activity yet, just make one existing
 
1170
         * message buffer used without slots, causing the next report to tell
 
1171
         * new xact-counters.
 
1172
         */
 
1173
        if (pgStatTabstatAlloc == 0)
 
1174
                more_tabstat_space();
 
1175
 
 
1176
        if (pgStatTabstatUsed == 0)
 
1177
        {
 
1178
                pgStatTabstatUsed++;
 
1179
                pgStatTabstatMessages[0]->m_nentries = 0;
 
1180
        }
 
1181
}
 
1182
 
 
1183
 
 
1184
/* ----------
 
1185
 * pgstat_fetch_stat_dbentry() -
 
1186
 *
 
1187
 *      Support function for the SQL-callable pgstat* functions. Returns
 
1188
 *      the collected statistics for one database or NULL. NULL doesn't mean
 
1189
 *      that the database doesn't exist, it is just not yet known by the
 
1190
 *      collector, so the caller is better off to report ZERO instead.
 
1191
 * ----------
 
1192
 */
 
1193
PgStat_StatDBEntry *
 
1194
pgstat_fetch_stat_dbentry(Oid dbid)
 
1195
{
 
1196
        PgStat_StatDBEntry *dbentry;
 
1197
 
 
1198
        /*
 
1199
         * If not done for this transaction, read the statistics collector
 
1200
         * stats file into some hash tables.
 
1201
         */
 
1202
        backend_read_statsfile();
 
1203
 
 
1204
        /*
 
1205
         * Lookup the requested database
 
1206
         */
 
1207
        dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 
1208
                                                                                                 (void *) &dbid,
 
1209
                                                                                                 HASH_FIND, NULL);
 
1210
        if (dbentry == NULL)
 
1211
                return NULL;
 
1212
 
 
1213
        return dbentry;
 
1214
}
 
1215
 
 
1216
 
 
1217
/* ----------
 
1218
 * pgstat_fetch_stat_tabentry() -
 
1219
 *
 
1220
 *      Support function for the SQL-callable pgstat* functions. Returns
 
1221
 *      the collected statistics for one table or NULL. NULL doesn't mean
 
1222
 *      that the table doesn't exist, it is just not yet known by the
 
1223
 *      collector, so the caller is better off to report ZERO instead.
 
1224
 * ----------
 
1225
 */
 
1226
PgStat_StatTabEntry *
 
1227
pgstat_fetch_stat_tabentry(Oid relid)
 
1228
{
 
1229
        PgStat_StatDBEntry *dbentry;
 
1230
        PgStat_StatTabEntry *tabentry;
 
1231
 
 
1232
        /*
 
1233
         * If not done for this transaction, read the statistics collector
 
1234
         * stats file into some hash tables.
 
1235
         */
 
1236
        backend_read_statsfile();
 
1237
 
 
1238
        /*
 
1239
         * Lookup our database.
 
1240
         */
 
1241
        dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 
1242
                                                                                                 (void *) &MyDatabaseId,
 
1243
                                                                                                 HASH_FIND, NULL);
 
1244
        if (dbentry == NULL)
 
1245
                return NULL;
 
1246
 
 
1247
        /*
 
1248
         * Now inside the DB's table hash table lookup the requested one.
 
1249
         */
 
1250
        if (dbentry->tables == NULL)
 
1251
                return NULL;
 
1252
        tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
 
1253
                                                                                                   (void *) &relid,
 
1254
                                                                                                   HASH_FIND, NULL);
 
1255
        if (tabentry == NULL)
 
1256
                return NULL;
 
1257
 
 
1258
        return tabentry;
 
1259
}
 
1260
 
 
1261
 
 
1262
/* ----------
 
1263
 * pgstat_fetch_stat_beentry() -
 
1264
 *
 
1265
 *      Support function for the SQL-callable pgstat* functions. Returns
 
1266
 *      the actual activity slot of one active backend. The caller is
 
1267
 *      responsible for a check if the actual user is permitted to see
 
1268
 *      that info (especially the querystring).
 
1269
 * ----------
 
1270
 */
 
1271
PgStat_StatBeEntry *
 
1272
pgstat_fetch_stat_beentry(int beid)
 
1273
{
 
1274
        backend_read_statsfile();
 
1275
 
 
1276
        if (beid < 1 || beid > pgStatNumBackends)
 
1277
                return NULL;
 
1278
 
 
1279
        return &pgStatBeTable[beid - 1];
 
1280
}
 
1281
 
 
1282
 
 
1283
/* ----------
 
1284
 * pgstat_fetch_stat_numbackends() -
 
1285
 *
 
1286
 *      Support function for the SQL-callable pgstat* functions. Returns
 
1287
 *      the maximum current backend id.
 
1288
 * ----------
 
1289
 */
 
1290
int
 
1291
pgstat_fetch_stat_numbackends(void)
 
1292
{
 
1293
        backend_read_statsfile();
 
1294
 
 
1295
        return pgStatNumBackends;
 
1296
}
 
1297
 
 
1298
 
 
1299
 
 
1300
/* ------------------------------------------------------------
 
1301
 * Local support functions follow
 
1302
 * ------------------------------------------------------------
 
1303
 */
 
1304
 
 
1305
 
 
1306
/* ----------
 
1307
 * pgstat_setheader() -
 
1308
 *
 
1309
 *              Set common header fields in a statistics message
 
1310
 * ----------
 
1311
 */
 
1312
static void
 
1313
pgstat_setheader(PgStat_MsgHdr *hdr, int mtype)
 
1314
{
 
1315
        hdr->m_type = mtype;
 
1316
        hdr->m_backendid = MyBackendId;
 
1317
        hdr->m_procpid = MyProcPid;
 
1318
        hdr->m_databaseid = MyDatabaseId;
 
1319
        hdr->m_userid = GetSessionUserId();
 
1320
}
 
1321
 
 
1322
 
 
1323
/* ----------
 
1324
 * pgstat_send() -
 
1325
 *
 
1326
 *              Send out one statistics message to the collector
 
1327
 * ----------
 
1328
 */
 
1329
static void
 
1330
pgstat_send(void *msg, int len)
 
1331
{
 
1332
        if (pgStatSock < 0)
 
1333
                return;
 
1334
 
 
1335
        ((PgStat_MsgHdr *) msg)->m_size = len;
 
1336
 
 
1337
        send(pgStatSock, msg, len, 0);
 
1338
        /* We deliberately ignore any error from send() */
 
1339
}
 
1340
 
 
1341
 
 
1342
/* ----------
 
1343
 * PgstatBufferMain() -
 
1344
 *
 
1345
 *      Start up the statistics buffer process.  This is the body of the
 
1346
 *      postmaster child process.
 
1347
 *
 
1348
 *      The argc/argv parameters are valid only in EXEC_BACKEND case.
 
1349
 * ----------
 
1350
 */
 
1351
NON_EXEC_STATIC void
 
1352
PgstatBufferMain(int argc, char *argv[])
 
1353
{
 
1354
        IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
 
1355
 
 
1356
        MyProcPid = getpid();           /* reset MyProcPid */
 
1357
 
 
1358
        /* Lose the postmaster's on-exit routines */
 
1359
        on_exit_reset();
 
1360
 
 
1361
        /*
 
1362
         * Ignore all signals usually bound to some action in the postmaster,
 
1363
         * except for SIGCHLD and SIGQUIT --- see pgstat_recvbuffer.
 
1364
         */
 
1365
        pqsignal(SIGHUP, SIG_IGN);
 
1366
        pqsignal(SIGINT, SIG_IGN);
 
1367
        pqsignal(SIGTERM, SIG_IGN);
 
1368
        pqsignal(SIGQUIT, pgstat_exit);
 
1369
        pqsignal(SIGALRM, SIG_IGN);
 
1370
        pqsignal(SIGPIPE, SIG_IGN);
 
1371
        pqsignal(SIGUSR1, SIG_IGN);
 
1372
        pqsignal(SIGUSR2, SIG_IGN);
 
1373
        pqsignal(SIGCHLD, pgstat_die);
 
1374
        pqsignal(SIGTTIN, SIG_DFL);
 
1375
        pqsignal(SIGTTOU, SIG_DFL);
 
1376
        pqsignal(SIGCONT, SIG_DFL);
 
1377
        pqsignal(SIGWINCH, SIG_DFL);
 
1378
        /* unblock will happen in pgstat_recvbuffer */
 
1379
 
 
1380
#ifdef EXEC_BACKEND
 
1381
        pgstat_parseArgs(argc, argv);
 
1382
#endif
 
1383
 
 
1384
        /*
 
1385
         * Start a buffering process to read from the socket, so we have a
 
1386
         * little more time to process incoming messages.
 
1387
         *
 
1388
         * NOTE: the process structure is: postmaster is parent of buffer process
 
1389
         * is parent of collector process.      This way, the buffer can detect
 
1390
         * collector failure via SIGCHLD, whereas otherwise it wouldn't notice
 
1391
         * collector failure until it tried to write on the pipe.  That would
 
1392
         * mean that after the postmaster started a new collector, we'd have
 
1393
         * two buffer processes competing to read from the UDP socket --- not
 
1394
         * good.
 
1395
         */
 
1396
        if (pgpipe(pgStatPipe) < 0)
 
1397
                ereport(ERROR,
 
1398
                                (errcode_for_socket_access(),
 
1399
                         errmsg("could not create pipe for statistics buffer: %m")));
 
1400
 
 
1401
        /* child becomes collector process */
 
1402
#ifdef EXEC_BACKEND
 
1403
        pgStatCollectorPid = pgstat_forkexec(STAT_PROC_COLLECTOR);
 
1404
#else
 
1405
        pgStatCollectorPid = fork();
 
1406
#endif
 
1407
        switch (pgStatCollectorPid)
 
1408
        {
 
1409
                case -1:
 
1410
                        ereport(ERROR,
 
1411
                                        (errmsg("could not fork statistics collector: %m")));
 
1412
 
 
1413
#ifndef EXEC_BACKEND
 
1414
                case 0:
 
1415
                        /* child becomes collector process */
 
1416
                        PgstatCollectorMain(0, NULL);
 
1417
                        break;
 
1418
#endif
 
1419
 
 
1420
                default:
 
1421
                        /* parent becomes buffer process */
 
1422
                        closesocket(pgStatPipe[0]);
 
1423
                        pgstat_recvbuffer();
 
1424
        }
 
1425
        exit(0);
 
1426
}
 
1427
 
 
1428
 
 
1429
/* ----------
 
1430
 * PgstatCollectorMain() -
 
1431
 *
 
1432
 *      Start up the statistics collector itself.  This is the body of the
 
1433
 *      postmaster grandchild process.
 
1434
 *
 
1435
 *      The argc/argv parameters are valid only in EXEC_BACKEND case.
 
1436
 * ----------
 
1437
 */
 
1438
NON_EXEC_STATIC void
 
1439
PgstatCollectorMain(int argc, char *argv[])
 
1440
{
 
1441
        PgStat_Msg      msg;
 
1442
        fd_set          rfds;
 
1443
        int                     readPipe;
 
1444
        int                     nready;
 
1445
        int                     len = 0;
 
1446
        struct timeval timeout;
 
1447
        struct timeval next_statwrite;
 
1448
        bool            need_statwrite;
 
1449
        HASHCTL         hash_ctl;
 
1450
 
 
1451
        MyProcPid = getpid();           /* reset MyProcPid */
 
1452
 
 
1453
        /*
 
1454
         * Reset signal handling.  With the exception of restoring default
 
1455
         * SIGCHLD and SIGQUIT handling, this is a no-op in the
 
1456
         * non-EXEC_BACKEND case because we'll have inherited these settings
 
1457
         * from the buffer process; but it's not a no-op for EXEC_BACKEND.
 
1458
         */
 
1459
        pqsignal(SIGHUP, SIG_IGN);
 
1460
        pqsignal(SIGINT, SIG_IGN);
 
1461
        pqsignal(SIGTERM, SIG_IGN);
 
1462
#ifndef WIN32
 
1463
        pqsignal(SIGQUIT, SIG_IGN);
 
1464
#else
 
1465
        /* kluge to allow buffer process to kill collector; FIXME */
 
1466
        pqsignal(SIGQUIT, pgstat_exit);
 
1467
#endif
 
1468
        pqsignal(SIGALRM, SIG_IGN);
 
1469
        pqsignal(SIGPIPE, SIG_IGN);
 
1470
        pqsignal(SIGUSR1, SIG_IGN);
 
1471
        pqsignal(SIGUSR2, SIG_IGN);
 
1472
        pqsignal(SIGCHLD, SIG_DFL);
 
1473
        pqsignal(SIGTTIN, SIG_DFL);
 
1474
        pqsignal(SIGTTOU, SIG_DFL);
 
1475
        pqsignal(SIGCONT, SIG_DFL);
 
1476
        pqsignal(SIGWINCH, SIG_DFL);
 
1477
        PG_SETMASK(&UnBlockSig);
 
1478
 
 
1479
#ifdef EXEC_BACKEND
 
1480
        pgstat_parseArgs(argc, argv);
 
1481
#endif
 
1482
 
 
1483
        /* Close unwanted files */
 
1484
        closesocket(pgStatPipe[1]);
 
1485
        closesocket(pgStatSock);
 
1486
 
 
1487
        /*
 
1488
         * Identify myself via ps
 
1489
         */
 
1490
        init_ps_display("stats collector process", "", "");
 
1491
        set_ps_display("");
 
1492
 
 
1493
        /*
 
1494
         * Initialize filenames needed for status reports.
 
1495
         */
 
1496
        snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
 
1497
        /* tmpfname need only be set correctly in this process */
 
1498
        snprintf(pgStat_tmpfname, MAXPGPATH, PGSTAT_STAT_TMPFILE,
 
1499
                         DataDir, (int)getpid());
 
1500
 
 
1501
        /*
 
1502
         * Arrange to write the initial status file right away
 
1503
         */
 
1504
        gettimeofday(&next_statwrite, NULL);
 
1505
        need_statwrite = TRUE;
 
1506
 
 
1507
        /*
 
1508
         * Read in an existing statistics stats file or initialize the stats
 
1509
         * to zero.
 
1510
         */
 
1511
        pgStatRunningInCollector = TRUE;
 
1512
        pgstat_read_statsfile(&pgStatDBHash, InvalidOid, NULL, NULL);
 
1513
 
 
1514
        /*
 
1515
         * Create the dead backend hashtable
 
1516
         */
 
1517
        memset(&hash_ctl, 0, sizeof(hash_ctl));
 
1518
        hash_ctl.keysize = sizeof(int);
 
1519
        hash_ctl.entrysize = sizeof(PgStat_StatBeDead);
 
1520
        hash_ctl.hash = tag_hash;
 
1521
        pgStatBeDead = hash_create("Dead Backends", PGSTAT_BE_HASH_SIZE,
 
1522
                                                           &hash_ctl, HASH_ELEM | HASH_FUNCTION);
 
1523
 
 
1524
        /*
 
1525
         * Create the known backends table
 
1526
         */
 
1527
        pgStatBeTable = (PgStat_StatBeEntry *) palloc0(
 
1528
                                                           sizeof(PgStat_StatBeEntry) * MaxBackends);
 
1529
 
 
1530
        readPipe = pgStatPipe[0];
 
1531
 
 
1532
        /*
 
1533
         * Process incoming messages and handle all the reporting stuff until
 
1534
         * there are no more messages.
 
1535
         */
 
1536
        for (;;)
 
1537
        {
 
1538
                /*
 
1539
                 * If we need to write the status file again (there have been
 
1540
                 * changes in the statistics since we wrote it last) calculate the
 
1541
                 * timeout until we have to do so.
 
1542
                 */
 
1543
                if (need_statwrite)
 
1544
                {
 
1545
                        struct timeval now;
 
1546
 
 
1547
                        gettimeofday(&now, NULL);
 
1548
                        /* avoid assuming that tv_sec is signed */
 
1549
                        if (now.tv_sec > next_statwrite.tv_sec ||
 
1550
                                (now.tv_sec == next_statwrite.tv_sec &&
 
1551
                                 now.tv_usec >= next_statwrite.tv_usec))
 
1552
                        {
 
1553
                                timeout.tv_sec = 0;
 
1554
                                timeout.tv_usec = 0;
 
1555
                        }
 
1556
                        else
 
1557
                        {
 
1558
                                timeout.tv_sec = next_statwrite.tv_sec - now.tv_sec;
 
1559
                                timeout.tv_usec = next_statwrite.tv_usec - now.tv_usec;
 
1560
                                if (timeout.tv_usec < 0)
 
1561
                                {
 
1562
                                        timeout.tv_sec--;
 
1563
                                        timeout.tv_usec += 1000000;
 
1564
                                }
 
1565
                        }
 
1566
                }
 
1567
 
 
1568
                /*
 
1569
                 * Setup the descriptor set for select(2)
 
1570
                 */
 
1571
                FD_ZERO(&rfds);
 
1572
                FD_SET(readPipe, &rfds);
 
1573
 
 
1574
                /*
 
1575
                 * Now wait for something to do.
 
1576
                 */
 
1577
                nready = select(readPipe + 1, &rfds, NULL, NULL,
 
1578
                                                (need_statwrite) ? &timeout : NULL);
 
1579
                if (nready < 0)
 
1580
                {
 
1581
                        if (errno == EINTR)
 
1582
                                continue;
 
1583
                        ereport(ERROR,
 
1584
                                        (errcode_for_socket_access(),
 
1585
                                 errmsg("select() failed in statistics collector: %m")));
 
1586
                }
 
1587
 
 
1588
                /*
 
1589
                 * If there are no descriptors ready, our timeout for writing the
 
1590
                 * stats file happened.
 
1591
                 */
 
1592
                if (nready == 0)
 
1593
                {
 
1594
                        pgstat_write_statsfile();
 
1595
                        need_statwrite = FALSE;
 
1596
 
 
1597
                        continue;
 
1598
                }
 
1599
 
 
1600
                /*
 
1601
                 * Check if there is a new statistics message to collect.
 
1602
                 */
 
1603
                if (FD_ISSET(readPipe, &rfds))
 
1604
                {
 
1605
                        /*
 
1606
                         * We may need to issue multiple read calls in case the buffer
 
1607
                         * process didn't write the message in a single write, which
 
1608
                         * is possible since it dumps its buffer bytewise. In any
 
1609
                         * case, we'd need two reads since we don't know the message
 
1610
                         * length initially.
 
1611
                         */
 
1612
                        int                     nread = 0;
 
1613
                        int                     targetlen = sizeof(PgStat_MsgHdr);              /* initial */
 
1614
                        bool            pipeEOF = false;
 
1615
 
 
1616
                        while (nread < targetlen)
 
1617
                        {
 
1618
                                len = piperead(readPipe, ((char *) &msg) + nread,
 
1619
                                                           targetlen - nread);
 
1620
                                if (len < 0)
 
1621
                                {
 
1622
                                        if (errno == EINTR)
 
1623
                                                continue;
 
1624
                                        ereport(ERROR,
 
1625
                                                        (errcode_for_socket_access(),
 
1626
                                                         errmsg("could not read from statistics collector pipe: %m")));
 
1627
                                }
 
1628
                                if (len == 0)   /* EOF on the pipe! */
 
1629
                                {
 
1630
                                        pipeEOF = true;
 
1631
                                        break;
 
1632
                                }
 
1633
                                nread += len;
 
1634
                                if (nread == sizeof(PgStat_MsgHdr))
 
1635
                                {
 
1636
                                        /* we have the header, compute actual msg length */
 
1637
                                        targetlen = msg.msg_hdr.m_size;
 
1638
                                        if (targetlen < (int) sizeof(PgStat_MsgHdr) ||
 
1639
                                                targetlen > (int) sizeof(msg))
 
1640
                                        {
 
1641
                                                /*
 
1642
                                                 * Bogus message length implies that we got out of
 
1643
                                                 * sync with the buffer process somehow. Abort so
 
1644
                                                 * that we can restart both processes.
 
1645
                                                 */
 
1646
                                                ereport(ERROR,
 
1647
                                                  (errmsg("invalid statistics message length")));
 
1648
                                        }
 
1649
                                }
 
1650
                        }
 
1651
 
 
1652
                        /*
 
1653
                         * EOF on the pipe implies that the buffer process exited.
 
1654
                         * Fall out of outer loop.
 
1655
                         */
 
1656
                        if (pipeEOF)
 
1657
                                break;
 
1658
 
 
1659
                        /*
 
1660
                         * Distribute the message to the specific function handling
 
1661
                         * it.
 
1662
                         */
 
1663
                        switch (msg.msg_hdr.m_type)
 
1664
                        {
 
1665
                                case PGSTAT_MTYPE_DUMMY:
 
1666
                                        break;
 
1667
 
 
1668
                                case PGSTAT_MTYPE_BESTART:
 
1669
                                        pgstat_recv_bestart((PgStat_MsgBestart *) &msg, nread);
 
1670
                                        break;
 
1671
 
 
1672
                                case PGSTAT_MTYPE_BETERM:
 
1673
                                        pgstat_recv_beterm((PgStat_MsgBeterm *) &msg, nread);
 
1674
                                        break;
 
1675
 
 
1676
                                case PGSTAT_MTYPE_TABSTAT:
 
1677
                                        pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, nread);
 
1678
                                        break;
 
1679
 
 
1680
                                case PGSTAT_MTYPE_TABPURGE:
 
1681
                                        pgstat_recv_tabpurge((PgStat_MsgTabpurge *) &msg, nread);
 
1682
                                        break;
 
1683
 
 
1684
                                case PGSTAT_MTYPE_ACTIVITY:
 
1685
                                        pgstat_recv_activity((PgStat_MsgActivity *) &msg, nread);
 
1686
                                        break;
 
1687
 
 
1688
                                case PGSTAT_MTYPE_DROPDB:
 
1689
                                        pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, nread);
 
1690
                                        break;
 
1691
 
 
1692
                                case PGSTAT_MTYPE_RESETCOUNTER:
 
1693
                                        pgstat_recv_resetcounter((PgStat_MsgResetcounter *) &msg,
 
1694
                                                                                         nread);
 
1695
                                        break;
 
1696
 
 
1697
                                default:
 
1698
                                        break;
 
1699
                        }
 
1700
 
 
1701
                        /*
 
1702
                         * Globally count messages.
 
1703
                         */
 
1704
                        pgStatNumMessages++;
 
1705
 
 
1706
                        /*
 
1707
                         * If this is the first message after we wrote the stats file
 
1708
                         * the last time, setup the timeout that it'd be written.
 
1709
                         */
 
1710
                        if (!need_statwrite)
 
1711
                        {
 
1712
                                gettimeofday(&next_statwrite, NULL);
 
1713
                                next_statwrite.tv_usec += ((PGSTAT_STAT_INTERVAL) * 1000);
 
1714
                                next_statwrite.tv_sec += (next_statwrite.tv_usec / 1000000);
 
1715
                                next_statwrite.tv_usec %= 1000000;
 
1716
                                need_statwrite = TRUE;
 
1717
                        }
 
1718
                }
 
1719
 
 
1720
                /*
 
1721
                 * Note that we do NOT check for postmaster exit inside the loop;
 
1722
                 * only EOF on the buffer pipe causes us to fall out.  This
 
1723
                 * ensures we don't exit prematurely if there are still a few
 
1724
                 * messages in the buffer or pipe at postmaster shutdown.
 
1725
                 */
 
1726
        }
 
1727
 
 
1728
        /*
 
1729
         * Okay, we saw EOF on the buffer pipe, so there are no more messages
 
1730
         * to process.  If the buffer process quit because of postmaster
 
1731
         * shutdown, we want to save the final stats to reuse at next startup.
 
1732
         * But if the buffer process failed, it seems best not to (there may
 
1733
         * even now be a new collector firing up, and we don't want it to read
 
1734
         * a partially-rewritten stats file).
 
1735
         */
 
1736
        if (!PostmasterIsAlive(false))
 
1737
                pgstat_write_statsfile();
 
1738
}
 
1739
 
 
1740
 
 
1741
/* ----------
 
1742
 * pgstat_recvbuffer() -
 
1743
 *
 
1744
 *      This is the body of the separate buffering process. Its only
 
1745
 *      purpose is to receive messages from the UDP socket as fast as
 
1746
 *      possible and forward them over a pipe into the collector itself.
 
1747
 *      If the collector is slow to absorb messages, they are buffered here.
 
1748
 * ----------
 
1749
 */
 
1750
static void
 
1751
pgstat_recvbuffer(void)
 
1752
{
 
1753
        fd_set          rfds;
 
1754
        fd_set          wfds;
 
1755
        struct timeval timeout;
 
1756
        int                     writePipe = pgStatPipe[1];
 
1757
        int                     maxfd;
 
1758
        int                     nready;
 
1759
        int                     len;
 
1760
        int                     xfr;
 
1761
        int                     frm;
 
1762
        PgStat_Msg      input_buffer;
 
1763
        char       *msgbuffer;
 
1764
        int                     msg_send = 0;   /* next send index in buffer */
 
1765
        int                     msg_recv = 0;   /* next receive index */
 
1766
        int                     msg_have = 0;   /* number of bytes stored */
 
1767
        bool            overflow = false;
 
1768
 
 
1769
        /*
 
1770
         * Identify myself via ps
 
1771
         */
 
1772
        init_ps_display("stats buffer process", "", "");
 
1773
        set_ps_display("");
 
1774
 
 
1775
        /*
 
1776
         * We want to die if our child collector process does.  There are two
 
1777
         * ways we might notice that it has died: receive SIGCHLD, or get a
 
1778
         * write failure on the pipe leading to the child.      We can set SIGPIPE
 
1779
         * to kill us here.  Our SIGCHLD handler was already set up before we
 
1780
         * forked (must do it that way, else it's a race condition).
 
1781
         */
 
1782
        pqsignal(SIGPIPE, SIG_DFL);
 
1783
        PG_SETMASK(&UnBlockSig);
 
1784
 
 
1785
        /*
 
1786
         * Set the write pipe to nonblock mode, so that we cannot block when
 
1787
         * the collector falls behind.
 
1788
         */
 
1789
        if (!pg_set_noblock(writePipe))
 
1790
                ereport(ERROR,
 
1791
                                (errcode_for_socket_access(),
 
1792
                                 errmsg("could not set statistics collector pipe to nonblocking mode: %m")));
 
1793
 
 
1794
        /*
 
1795
         * Allocate the message buffer
 
1796
         */
 
1797
        msgbuffer = (char *) palloc(PGSTAT_RECVBUFFERSZ);
 
1798
 
 
1799
        /*
 
1800
         * Loop forever
 
1801
         */
 
1802
        for (;;)
 
1803
        {
 
1804
                FD_ZERO(&rfds);
 
1805
                FD_ZERO(&wfds);
 
1806
                maxfd = -1;
 
1807
 
 
1808
                /*
 
1809
                 * As long as we have buffer space we add the socket to the read
 
1810
                 * descriptor set.
 
1811
                 */
 
1812
                if (msg_have <= (int) (PGSTAT_RECVBUFFERSZ - sizeof(PgStat_Msg)))
 
1813
                {
 
1814
                        FD_SET(pgStatSock, &rfds);
 
1815
                        maxfd = pgStatSock;
 
1816
                        overflow = false;
 
1817
                }
 
1818
                else
 
1819
                {
 
1820
                        if (!overflow)
 
1821
                        {
 
1822
                                ereport(LOG,
 
1823
                                                (errmsg("statistics buffer is full")));
 
1824
                                overflow = true;
 
1825
                        }
 
1826
                }
 
1827
 
 
1828
                /*
 
1829
                 * If we have messages to write out, we add the pipe to the write
 
1830
                 * descriptor set.
 
1831
                 */
 
1832
                if (msg_have > 0)
 
1833
                {
 
1834
                        FD_SET(writePipe, &wfds);
 
1835
                        if (writePipe > maxfd)
 
1836
                                maxfd = writePipe;
 
1837
                }
 
1838
 
 
1839
                /*
 
1840
                 * Wait for some work to do; but not for more than 10 seconds.
 
1841
                 * (This determines how quickly we will shut down after an
 
1842
                 * ungraceful postmaster termination; so it needn't be very fast.)
 
1843
                 */
 
1844
                timeout.tv_sec = 10;
 
1845
                timeout.tv_usec = 0;
 
1846
 
 
1847
                nready = select(maxfd + 1, &rfds, &wfds, NULL, &timeout);
 
1848
                if (nready < 0)
 
1849
                {
 
1850
                        if (errno == EINTR)
 
1851
                                continue;
 
1852
                        ereport(ERROR,
 
1853
                                        (errcode_for_socket_access(),
 
1854
                                         errmsg("select() failed in statistics buffer: %m")));
 
1855
                }
 
1856
 
 
1857
                /*
 
1858
                 * If there is a message on the socket, read it and check for
 
1859
                 * validity.
 
1860
                 */
 
1861
                if (FD_ISSET(pgStatSock, &rfds))
 
1862
                {
 
1863
                        len = recv(pgStatSock, (char *) &input_buffer,
 
1864
                                           sizeof(PgStat_Msg), 0);
 
1865
                        if (len < 0)
 
1866
                                ereport(ERROR,
 
1867
                                                (errcode_for_socket_access(),
 
1868
                                           errmsg("could not read statistics message: %m")));
 
1869
 
 
1870
                        /*
 
1871
                         * We ignore messages that are smaller than our common header
 
1872
                         */
 
1873
                        if (len < sizeof(PgStat_MsgHdr))
 
1874
                                continue;
 
1875
 
 
1876
                        /*
 
1877
                         * The received length must match the length in the header
 
1878
                         */
 
1879
                        if (input_buffer.msg_hdr.m_size != len)
 
1880
                                continue;
 
1881
 
 
1882
                        /*
 
1883
                         * O.K. - we accept this message.  Copy it to the circular
 
1884
                         * msgbuffer.
 
1885
                         */
 
1886
                        frm = 0;
 
1887
                        while (len > 0)
 
1888
                        {
 
1889
                                xfr = PGSTAT_RECVBUFFERSZ - msg_recv;
 
1890
                                if (xfr > len)
 
1891
                                        xfr = len;
 
1892
                                Assert(xfr > 0);
 
1893
                                memcpy(msgbuffer + msg_recv,
 
1894
                                           ((char *) &input_buffer) + frm,
 
1895
                                           xfr);
 
1896
                                msg_recv += xfr;
 
1897
                                if (msg_recv == PGSTAT_RECVBUFFERSZ)
 
1898
                                        msg_recv = 0;
 
1899
                                msg_have += xfr;
 
1900
                                frm += xfr;
 
1901
                                len -= xfr;
 
1902
                        }
 
1903
                }
 
1904
 
 
1905
                /*
 
1906
                 * If the collector is ready to receive, write some data into his
 
1907
                 * pipe.  We may or may not be able to write all that we have.
 
1908
                 *
 
1909
                 * NOTE: if what we have is less than PIPE_BUF bytes but more than
 
1910
                 * the space available in the pipe buffer, most kernels will
 
1911
                 * refuse to write any of it, and will return EAGAIN.  This means
 
1912
                 * we will busy-loop until the situation changes (either because
 
1913
                 * the collector caught up, or because more data arrives so that
 
1914
                 * we have more than PIPE_BUF bytes buffered).  This is not good,
 
1915
                 * but is there any way around it?      We have no way to tell when
 
1916
                 * the collector has caught up...
 
1917
                 */
 
1918
                if (FD_ISSET(writePipe, &wfds))
 
1919
                {
 
1920
                        xfr = PGSTAT_RECVBUFFERSZ - msg_send;
 
1921
                        if (xfr > msg_have)
 
1922
                                xfr = msg_have;
 
1923
                        Assert(xfr > 0);
 
1924
                        len = pipewrite(writePipe, msgbuffer + msg_send, xfr);
 
1925
                        if (len < 0)
 
1926
                        {
 
1927
                                if (errno == EINTR || errno == EAGAIN)
 
1928
                                        continue;       /* not enough space in pipe */
 
1929
                                ereport(ERROR,
 
1930
                                                (errcode_for_socket_access(),
 
1931
                                                 errmsg("could not write to statistics collector pipe: %m")));
 
1932
                        }
 
1933
                        /* NB: len < xfr is okay */
 
1934
                        msg_send += len;
 
1935
                        if (msg_send == PGSTAT_RECVBUFFERSZ)
 
1936
                                msg_send = 0;
 
1937
                        msg_have -= len;
 
1938
                }
 
1939
 
 
1940
                /*
 
1941
                 * Make sure we forwarded all messages before we check for
 
1942
                 * postmaster termination.
 
1943
                 */
 
1944
                if (msg_have != 0 || FD_ISSET(pgStatSock, &rfds))
 
1945
                        continue;
 
1946
 
 
1947
                /*
 
1948
                 * If the postmaster has terminated, we die too.  (This is no
 
1949
                 * longer the normal exit path, however.)
 
1950
                 */
 
1951
                if (!PostmasterIsAlive(true))
 
1952
                        exit(0);
 
1953
        }
 
1954
}
 
1955
 
 
1956
/* SIGQUIT signal handler for buffer process */
 
1957
static void
 
1958
pgstat_exit(SIGNAL_ARGS)
 
1959
{
 
1960
        /*
 
1961
         * For now, we just nail the doors shut and get out of town.  It might
 
1962
         * be cleaner to allow any pending messages to be sent, but that
 
1963
         * creates a tradeoff against speed of exit.
 
1964
         */
 
1965
 
 
1966
        /*
 
1967
         * If running in bufferer, kill our collector as well. On some broken
 
1968
         * win32 systems, it does not shut down automatically because of issues
 
1969
         * with socket inheritance.  XXX so why not fix the socket inheritance...
 
1970
         */
 
1971
#ifdef WIN32
 
1972
        if (pgStatCollectorPid > 0)
 
1973
                kill(pgStatCollectorPid, SIGQUIT);
 
1974
#endif
 
1975
        exit(0);
 
1976
}
 
1977
 
 
1978
/* SIGCHLD signal handler for buffer process */
 
1979
static void
 
1980
pgstat_die(SIGNAL_ARGS)
 
1981
{
 
1982
        exit(1);
 
1983
}
 
1984
 
 
1985
 
 
1986
/* ----------
 
1987
 * pgstat_add_backend() -
 
1988
 *
 
1989
 *      Support function to keep our backend list up to date.
 
1990
 * ----------
 
1991
 */
 
1992
static int
 
1993
pgstat_add_backend(PgStat_MsgHdr *msg)
 
1994
{
 
1995
        PgStat_StatDBEntry *dbentry;
 
1996
        PgStat_StatBeEntry *beentry;
 
1997
        PgStat_StatBeDead *deadbe;
 
1998
        bool            found;
 
1999
 
 
2000
        /*
 
2001
         * Check that the backend ID is valid
 
2002
         */
 
2003
        if (msg->m_backendid < 1 || msg->m_backendid > MaxBackends)
 
2004
        {
 
2005
                ereport(LOG,
 
2006
                         (errmsg("invalid server process ID %d", msg->m_backendid)));
 
2007
                return -1;
 
2008
        }
 
2009
 
 
2010
        /*
 
2011
         * Get the slot for this backendid.
 
2012
         */
 
2013
        beentry = &pgStatBeTable[msg->m_backendid - 1];
 
2014
        if (beentry->databaseid != InvalidOid)
 
2015
        {
 
2016
                /*
 
2017
                 * If the slot contains the PID of this backend, everything is
 
2018
                 * fine and we got nothing to do.
 
2019
                 */
 
2020
                if (beentry->procpid == msg->m_procpid)
 
2021
                        return 0;
 
2022
        }
 
2023
 
 
2024
        /*
 
2025
         * Lookup if this backend is known to be dead. This can be caused due
 
2026
         * to messages arriving in the wrong order - i.e. Postmaster's BETERM
 
2027
         * message might have arrived before we received all the backends
 
2028
         * stats messages, or even a new backend with the same backendid was
 
2029
         * faster in sending his BESTART.
 
2030
         *
 
2031
         * If the backend is known to be dead, we ignore this add.
 
2032
         */
 
2033
        deadbe = (PgStat_StatBeDead *) hash_search(pgStatBeDead,
 
2034
                                                                                           (void *) &(msg->m_procpid),
 
2035
                                                                                           HASH_FIND, NULL);
 
2036
        if (deadbe)
 
2037
                return 1;
 
2038
 
 
2039
        /*
 
2040
         * Backend isn't known to be dead. If it's slot is currently used, we
 
2041
         * have to kick out the old backend.
 
2042
         */
 
2043
        if (beentry->databaseid != InvalidOid)
 
2044
                pgstat_sub_backend(beentry->procpid);
 
2045
 
 
2046
        /*
 
2047
         * Put this new backend into the slot.
 
2048
         */
 
2049
        beentry->databaseid = msg->m_databaseid;
 
2050
        beentry->procpid = msg->m_procpid;
 
2051
        beentry->userid = msg->m_userid;
 
2052
        beentry->activity_start_sec = 0;
 
2053
        beentry->activity_start_usec = 0;
 
2054
        MemSet(beentry->activity, 0, PGSTAT_ACTIVITY_SIZE);
 
2055
 
 
2056
        /*
 
2057
         * Lookup or create the database entry for this backends DB.
 
2058
         */
 
2059
        dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 
2060
                                                                                   (void *) &(msg->m_databaseid),
 
2061
                                                                                                 HASH_ENTER, &found);
 
2062
        if (dbentry == NULL)
 
2063
                ereport(ERROR,
 
2064
                                (errcode(ERRCODE_OUT_OF_MEMORY),
 
2065
                         errmsg("out of memory in statistics collector --- abort")));
 
2066
 
 
2067
        /*
 
2068
         * If not found, initialize the new one.
 
2069
         */
 
2070
        if (!found)
 
2071
        {
 
2072
                HASHCTL         hash_ctl;
 
2073
 
 
2074
                dbentry->tables = NULL;
 
2075
                dbentry->n_xact_commit = 0;
 
2076
                dbentry->n_xact_rollback = 0;
 
2077
                dbentry->n_blocks_fetched = 0;
 
2078
                dbentry->n_blocks_hit = 0;
 
2079
                dbentry->n_connects = 0;
 
2080
                dbentry->destroy = 0;
 
2081
 
 
2082
                memset(&hash_ctl, 0, sizeof(hash_ctl));
 
2083
                hash_ctl.keysize = sizeof(Oid);
 
2084
                hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
 
2085
                hash_ctl.hash = tag_hash;
 
2086
                dbentry->tables = hash_create("Per-database table",
 
2087
                                                                          PGSTAT_TAB_HASH_SIZE,
 
2088
                                                                          &hash_ctl,
 
2089
                                                                          HASH_ELEM | HASH_FUNCTION);
 
2090
        }
 
2091
 
 
2092
        /*
 
2093
         * Count number of connects to the database
 
2094
         */
 
2095
        dbentry->n_connects++;
 
2096
 
 
2097
        return 0;
 
2098
}
 
2099
 
 
2100
 
 
2101
/* ----------
 
2102
 * pgstat_sub_backend() -
 
2103
 *
 
2104
 *      Remove a backend from the actual backends list.
 
2105
 * ----------
 
2106
 */
 
2107
static void
 
2108
pgstat_sub_backend(int procpid)
 
2109
{
 
2110
        int                     i;
 
2111
        PgStat_StatBeDead *deadbe;
 
2112
        bool            found;
 
2113
 
 
2114
        /*
 
2115
         * Search in the known-backends table for the slot containing this
 
2116
         * PID.
 
2117
         */
 
2118
        for (i = 0; i < MaxBackends; i++)
 
2119
        {
 
2120
                if (pgStatBeTable[i].databaseid != InvalidOid &&
 
2121
                        pgStatBeTable[i].procpid == procpid)
 
2122
                {
 
2123
                        /*
 
2124
                         * That's him. Add an entry to the known to be dead backends.
 
2125
                         * Due to possible misorder in the arrival of UDP packets it's
 
2126
                         * possible that even if we know the backend is dead, there
 
2127
                         * could still be messages queued that arrive later. Those
 
2128
                         * messages must not cause our number of backends statistics
 
2129
                         * to get screwed up, so we remember for a couple of seconds
 
2130
                         * that this PID is dead and ignore them (only the counting of
 
2131
                         * backends, not the table access stats they sent).
 
2132
                         */
 
2133
                        deadbe = (PgStat_StatBeDead *) hash_search(pgStatBeDead,
 
2134
                                                                                                           (void *) &procpid,
 
2135
                                                                                                           HASH_ENTER,
 
2136
                                                                                                           &found);
 
2137
                        if (deadbe == NULL)
 
2138
                                ereport(ERROR,
 
2139
                                                (errcode(ERRCODE_OUT_OF_MEMORY),
 
2140
                                                 errmsg("out of memory in statistics collector --- abort")));
 
2141
 
 
2142
                        if (!found)
 
2143
                        {
 
2144
                                deadbe->backendid = i + 1;
 
2145
                                deadbe->destroy = PGSTAT_DESTROY_COUNT;
 
2146
                        }
 
2147
 
 
2148
                        /*
 
2149
                         * Declare the backend slot empty.
 
2150
                         */
 
2151
                        pgStatBeTable[i].databaseid = InvalidOid;
 
2152
                        return;
 
2153
                }
 
2154
        }
 
2155
 
 
2156
        /*
 
2157
         * No big problem if not found. This can happen if UDP messages arrive
 
2158
         * out of order here.
 
2159
         */
 
2160
}
 
2161
 
 
2162
 
 
2163
/* ----------
 
2164
 * pgstat_write_statsfile() -
 
2165
 *
 
2166
 *      Tell the news.
 
2167
 * ----------
 
2168
 */
 
2169
static void
 
2170
pgstat_write_statsfile(void)
 
2171
{
 
2172
        HASH_SEQ_STATUS hstat;
 
2173
        HASH_SEQ_STATUS tstat;
 
2174
        PgStat_StatDBEntry *dbentry;
 
2175
        PgStat_StatTabEntry *tabentry;
 
2176
        PgStat_StatBeDead *deadbe;
 
2177
        FILE       *fpout;
 
2178
        int                     i;
 
2179
 
 
2180
        /*
 
2181
         * Open the statistics temp file to write out the current values.
 
2182
         */
 
2183
        fpout = fopen(pgStat_tmpfname, PG_BINARY_W);
 
2184
        if (fpout == NULL)
 
2185
        {
 
2186
                ereport(LOG,
 
2187
                                (errcode_for_file_access(),
 
2188
                        errmsg("could not open temporary statistics file \"%s\": %m",
 
2189
                                   pgStat_tmpfname)));
 
2190
                return;
 
2191
        }
 
2192
 
 
2193
        /*
 
2194
         * Walk through the database table.
 
2195
         */
 
2196
        hash_seq_init(&hstat, pgStatDBHash);
 
2197
        while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
 
2198
        {
 
2199
                /*
 
2200
                 * If this database is marked destroyed, count down and do so if
 
2201
                 * it reaches 0.
 
2202
                 */
 
2203
                if (dbentry->destroy > 0)
 
2204
                {
 
2205
                        if (--(dbentry->destroy) == 0)
 
2206
                        {
 
2207
                                if (dbentry->tables != NULL)
 
2208
                                        hash_destroy(dbentry->tables);
 
2209
 
 
2210
                                if (hash_search(pgStatDBHash,
 
2211
                                                                (void *) &(dbentry->databaseid),
 
2212
                                                                HASH_REMOVE, NULL) == NULL)
 
2213
                                        ereport(ERROR,
 
2214
                                                        (errmsg("database hash table corrupted "
 
2215
                                                                        "during cleanup --- abort")));
 
2216
                        }
 
2217
 
 
2218
                        /*
 
2219
                         * Don't include statistics for it.
 
2220
                         */
 
2221
                        continue;
 
2222
                }
 
2223
 
 
2224
                /*
 
2225
                 * Write out the DB line including the number of life backends.
 
2226
                 */
 
2227
                fputc('D', fpout);
 
2228
                fwrite(dbentry, sizeof(PgStat_StatDBEntry), 1, fpout);
 
2229
 
 
2230
                /*
 
2231
                 * Walk through the databases access stats per table.
 
2232
                 */
 
2233
                hash_seq_init(&tstat, dbentry->tables);
 
2234
                while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
 
2235
                {
 
2236
                        /*
 
2237
                         * If table entry marked for destruction, same as above for
 
2238
                         * the database entry.
 
2239
                         */
 
2240
                        if (tabentry->destroy > 0)
 
2241
                        {
 
2242
                                if (--(tabentry->destroy) == 0)
 
2243
                                {
 
2244
                                        if (hash_search(dbentry->tables,
 
2245
                                                                        (void *) &(tabentry->tableid),
 
2246
                                                                        HASH_REMOVE, NULL) == NULL)
 
2247
                                        {
 
2248
                                                ereport(ERROR,
 
2249
                                                                (errmsg("tables hash table for "
 
2250
                                                                                "database %u corrupted during "
 
2251
                                                                                "cleanup --- abort",
 
2252
                                                                                dbentry->databaseid)));
 
2253
                                        }
 
2254
                                }
 
2255
                                continue;
 
2256
                        }
 
2257
 
 
2258
                        /*
 
2259
                         * At least we think this is still a life table. Print it's
 
2260
                         * access stats.
 
2261
                         */
 
2262
                        fputc('T', fpout);
 
2263
                        fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
 
2264
                }
 
2265
 
 
2266
                /*
 
2267
                 * Mark the end of this DB
 
2268
                 */
 
2269
                fputc('d', fpout);
 
2270
        }
 
2271
 
 
2272
        /*
 
2273
         * Write out the known running backends to the stats file.
 
2274
         */
 
2275
        i = MaxBackends;
 
2276
        fputc('M', fpout);
 
2277
        fwrite(&i, sizeof(i), 1, fpout);
 
2278
 
 
2279
        for (i = 0; i < MaxBackends; i++)
 
2280
        {
 
2281
                if (pgStatBeTable[i].databaseid != InvalidOid)
 
2282
                {
 
2283
                        fputc('B', fpout);
 
2284
                        fwrite(&pgStatBeTable[i], sizeof(PgStat_StatBeEntry), 1, fpout);
 
2285
                }
 
2286
        }
 
2287
 
 
2288
        /*
 
2289
         * No more output to be done. Close the temp file and replace the old
 
2290
         * pgstat.stat with it.
 
2291
         */
 
2292
        fputc('E', fpout);
 
2293
        if (fclose(fpout) < 0)
 
2294
        {
 
2295
                ereport(LOG,
 
2296
                                (errcode_for_file_access(),
 
2297
                   errmsg("could not close temporary statistics file \"%s\": %m",
 
2298
                                  pgStat_tmpfname)));
 
2299
        }
 
2300
        else
 
2301
        {
 
2302
                if (rename(pgStat_tmpfname, pgStat_fname) < 0)
 
2303
                {
 
2304
                        ereport(LOG,
 
2305
                                        (errcode_for_file_access(),
 
2306
                                         errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
 
2307
                                                        pgStat_tmpfname, pgStat_fname)));
 
2308
                }
 
2309
        }
 
2310
 
 
2311
        /*
 
2312
         * Clear out the dead backends table
 
2313
         */
 
2314
        hash_seq_init(&hstat, pgStatBeDead);
 
2315
        while ((deadbe = (PgStat_StatBeDead *) hash_seq_search(&hstat)) != NULL)
 
2316
        {
 
2317
                /*
 
2318
                 * Count down the destroy delay and remove entries where it
 
2319
                 * reaches 0.
 
2320
                 */
 
2321
                if (--(deadbe->destroy) <= 0)
 
2322
                {
 
2323
                        if (hash_search(pgStatBeDead,
 
2324
                                                        (void *) &(deadbe->procpid),
 
2325
                                                        HASH_REMOVE, NULL) == NULL)
 
2326
                        {
 
2327
                                ereport(ERROR,
 
2328
                                          (errmsg("dead-server-process hash table corrupted "
 
2329
                                                          "during cleanup --- abort")));
 
2330
                        }
 
2331
                }
 
2332
        }
 
2333
}
 
2334
 
 
2335
 
 
2336
/* ----------
 
2337
 * pgstat_read_statsfile() -
 
2338
 *
 
2339
 *      Reads in an existing statistics collector and initializes the
 
2340
 *      databases hash table (who's entries point to the tables hash tables)
 
2341
 *      and the current backend table.
 
2342
 * ----------
 
2343
 */
 
2344
static void
 
2345
pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
 
2346
                                          PgStat_StatBeEntry **betab, int *numbackends)
 
2347
{
 
2348
        PgStat_StatDBEntry *dbentry;
 
2349
        PgStat_StatDBEntry dbbuf;
 
2350
        PgStat_StatTabEntry *tabentry;
 
2351
        PgStat_StatTabEntry tabbuf;
 
2352
        HASHCTL         hash_ctl;
 
2353
        HTAB       *tabhash = NULL;
 
2354
        FILE       *fpin;
 
2355
        int                     maxbackends = 0;
 
2356
        int                     havebackends = 0;
 
2357
        bool            found;
 
2358
        MemoryContext use_mcxt;
 
2359
        int                     mcxt_flags;
 
2360
 
 
2361
        /*
 
2362
         * If running in the collector we use the DynaHashCxt memory context.
 
2363
         * If running in a backend, we use the TopTransactionContext instead,
 
2364
         * so the caller must only know the last XactId when this call
 
2365
         * happened to know if his tables are still valid or already gone!
 
2366
         */
 
2367
        if (pgStatRunningInCollector)
 
2368
        {
 
2369
                use_mcxt = NULL;
 
2370
                mcxt_flags = 0;
 
2371
        }
 
2372
        else
 
2373
        {
 
2374
                use_mcxt = TopTransactionContext;
 
2375
                mcxt_flags = HASH_CONTEXT;
 
2376
        }
 
2377
 
 
2378
        /*
 
2379
         * Create the DB hashtable
 
2380
         */
 
2381
        memset(&hash_ctl, 0, sizeof(hash_ctl));
 
2382
        hash_ctl.keysize = sizeof(Oid);
 
2383
        hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
 
2384
        hash_ctl.hash = tag_hash;
 
2385
        hash_ctl.hcxt = use_mcxt;
 
2386
        *dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
 
2387
                                                  HASH_ELEM | HASH_FUNCTION | mcxt_flags);
 
2388
 
 
2389
        /*
 
2390
         * Initialize the number of known backends to zero, just in case we do
 
2391
         * a silent error return below.
 
2392
         */
 
2393
        if (numbackends != NULL)
 
2394
                *numbackends = 0;
 
2395
        if (betab != NULL)
 
2396
                *betab = NULL;
 
2397
 
 
2398
        /*
 
2399
         * In EXEC_BACKEND case, we won't have inherited pgStat_fname from
 
2400
         * postmaster, so compute it first time through.
 
2401
         */
 
2402
#ifdef EXEC_BACKEND
 
2403
        if (pgStat_fname[0] == '\0')
 
2404
        {
 
2405
                Assert(DataDir != NULL);
 
2406
                snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
 
2407
        }
 
2408
#endif
 
2409
 
 
2410
        /*
 
2411
         * Try to open the status file. If it doesn't exist, the backends
 
2412
         * simply return zero for anything and the collector simply starts
 
2413
         * from scratch with empty counters.
 
2414
         */
 
2415
        if ((fpin = AllocateFile(pgStat_fname, PG_BINARY_R)) == NULL)
 
2416
                return;
 
2417
 
 
2418
        /*
 
2419
         * We found an existing collector stats file. Read it and put all the
 
2420
         * hashtable entries into place.
 
2421
         */
 
2422
        for (;;)
 
2423
        {
 
2424
                switch (fgetc(fpin))
 
2425
                {
 
2426
                                /*
 
2427
                                 * 'D'  A PgStat_StatDBEntry struct describing a database
 
2428
                                 * follows. Subsequently, zero to many 'T' entries will
 
2429
                                 * follow until a 'd' is encountered.
 
2430
                                 */
 
2431
                        case 'D':
 
2432
                                if (fread(&dbbuf, 1, sizeof(dbbuf), fpin) != sizeof(dbbuf))
 
2433
                                {
 
2434
                                        ereport(pgStatRunningInCollector ? LOG : WARNING,
 
2435
                                                        (errmsg("corrupted pgstat.stat file")));
 
2436
                                        goto done;
 
2437
                                }
 
2438
 
 
2439
                                /*
 
2440
                                 * Add to the DB hash
 
2441
                                 */
 
2442
                                dbentry = (PgStat_StatDBEntry *) hash_search(*dbhash,
 
2443
                                                                                          (void *) &dbbuf.databaseid,
 
2444
                                                                                                                         HASH_ENTER,
 
2445
                                                                                                                         &found);
 
2446
                                if (dbentry == NULL)
 
2447
                                {
 
2448
                                        ereport(ERROR,
 
2449
                                                        (errcode(ERRCODE_OUT_OF_MEMORY),
 
2450
                                                         errmsg("out of memory")));
 
2451
                                }
 
2452
                                if (found)
 
2453
                                {
 
2454
                                        ereport(pgStatRunningInCollector ? LOG : WARNING,
 
2455
                                                        (errmsg("corrupted pgstat.stat file")));
 
2456
                                        goto done;
 
2457
                                }
 
2458
 
 
2459
                                memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
 
2460
                                dbentry->tables = NULL;
 
2461
                                dbentry->destroy = 0;
 
2462
                                dbentry->n_backends = 0;
 
2463
 
 
2464
                                /*
 
2465
                                 * Don't collect tables if not the requested DB
 
2466
                                 */
 
2467
                                if (onlydb != InvalidOid && onlydb != dbbuf.databaseid)
 
2468
                                        break;
 
2469
 
 
2470
                                memset(&hash_ctl, 0, sizeof(hash_ctl));
 
2471
                                hash_ctl.keysize = sizeof(Oid);
 
2472
                                hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
 
2473
                                hash_ctl.hash = tag_hash;
 
2474
                                hash_ctl.hcxt = use_mcxt;
 
2475
                                dbentry->tables = hash_create("Per-database table",
 
2476
                                                                                          PGSTAT_TAB_HASH_SIZE,
 
2477
                                                                                          &hash_ctl,
 
2478
                                                                                          HASH_ELEM | HASH_FUNCTION | mcxt_flags);
 
2479
 
 
2480
                                /*
 
2481
                                 * Arrange that following 'T's add entries to this
 
2482
                                 * databases tables hash table.
 
2483
                                 */
 
2484
                                tabhash = dbentry->tables;
 
2485
                                break;
 
2486
 
 
2487
                                /*
 
2488
                                 * 'd'  End of this database.
 
2489
                                 */
 
2490
                        case 'd':
 
2491
                                tabhash = NULL;
 
2492
                                break;
 
2493
 
 
2494
                                /*
 
2495
                                 * 'T'  A PgStat_StatTabEntry follows.
 
2496
                                 */
 
2497
                        case 'T':
 
2498
                                if (fread(&tabbuf, 1, sizeof(tabbuf), fpin) != sizeof(tabbuf))
 
2499
                                {
 
2500
                                        ereport(pgStatRunningInCollector ? LOG : WARNING,
 
2501
                                                        (errmsg("corrupted pgstat.stat file")));
 
2502
                                        goto done;
 
2503
                                }
 
2504
 
 
2505
                                /*
 
2506
                                 * Skip if table belongs to a not requested database.
 
2507
                                 */
 
2508
                                if (tabhash == NULL)
 
2509
                                        break;
 
2510
 
 
2511
                                tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
 
2512
                                                                                                (void *) &tabbuf.tableid,
 
2513
                                                                                                         HASH_ENTER, &found);
 
2514
                                if (tabentry == NULL)
 
2515
                                        ereport(ERROR,
 
2516
                                                        (errcode(ERRCODE_OUT_OF_MEMORY),
 
2517
                                                         errmsg("out of memory")));
 
2518
 
 
2519
                                if (found)
 
2520
                                {
 
2521
                                        ereport(pgStatRunningInCollector ? LOG : WARNING,
 
2522
                                                        (errmsg("corrupted pgstat.stat file")));
 
2523
                                        goto done;
 
2524
                                }
 
2525
 
 
2526
                                memcpy(tabentry, &tabbuf, sizeof(tabbuf));
 
2527
                                break;
 
2528
 
 
2529
                                /*
 
2530
                                 * 'M'  The maximum number of backends to expect follows.
 
2531
                                 */
 
2532
                        case 'M':
 
2533
                                if (betab == NULL || numbackends == NULL)
 
2534
                                        goto done;
 
2535
                                if (fread(&maxbackends, 1, sizeof(maxbackends), fpin) !=
 
2536
                                        sizeof(maxbackends))
 
2537
                                {
 
2538
                                        ereport(pgStatRunningInCollector ? LOG : WARNING,
 
2539
                                                        (errmsg("corrupted pgstat.stat file")));
 
2540
                                        goto done;
 
2541
                                }
 
2542
                                if (maxbackends == 0)
 
2543
                                        goto done;
 
2544
 
 
2545
                                /*
 
2546
                                 * Allocate space (in TopTransactionContext too) for the
 
2547
                                 * backend table.
 
2548
                                 */
 
2549
                                if (use_mcxt == NULL)
 
2550
                                        *betab = (PgStat_StatBeEntry *) palloc(
 
2551
                                                           sizeof(PgStat_StatBeEntry) * maxbackends);
 
2552
                                else
 
2553
                                        *betab = (PgStat_StatBeEntry *) MemoryContextAlloc(
 
2554
                                                                                                                                use_mcxt,
 
2555
                                                           sizeof(PgStat_StatBeEntry) * maxbackends);
 
2556
                                break;
 
2557
 
 
2558
                                /*
 
2559
                                 * 'B'  A PgStat_StatBeEntry follows.
 
2560
                                 */
 
2561
                        case 'B':
 
2562
                                if (betab == NULL || numbackends == NULL || *betab == NULL)
 
2563
                                        goto done;
 
2564
 
 
2565
                                /*
 
2566
                                 * Read it directly into the table.
 
2567
                                 */
 
2568
                                if (fread(&(*betab)[havebackends], 1,
 
2569
                                                  sizeof(PgStat_StatBeEntry), fpin) !=
 
2570
                                        sizeof(PgStat_StatBeEntry))
 
2571
                                {
 
2572
                                        ereport(pgStatRunningInCollector ? LOG : WARNING,
 
2573
                                                        (errmsg("corrupted pgstat.stat file")));
 
2574
                                        goto done;
 
2575
                                }
 
2576
 
 
2577
                                /*
 
2578
                                 * Count backends per database here.
 
2579
                                 */
 
2580
                                dbentry = (PgStat_StatDBEntry *) hash_search(*dbhash,
 
2581
                                                   (void *) &((*betab)[havebackends].databaseid),
 
2582
                                                                                                                HASH_FIND, NULL);
 
2583
                                if (dbentry)
 
2584
                                        dbentry->n_backends++;
 
2585
 
 
2586
                                havebackends++;
 
2587
                                if (numbackends != 0)
 
2588
                                        *numbackends = havebackends;
 
2589
                                if (havebackends >= maxbackends)
 
2590
                                        goto done;
 
2591
 
 
2592
                                break;
 
2593
 
 
2594
                                /*
 
2595
                                 * 'E'  The EOF marker of a complete stats file.
 
2596
                                 */
 
2597
                        case 'E':
 
2598
                                goto done;
 
2599
 
 
2600
                        default:
 
2601
                                ereport(pgStatRunningInCollector ? LOG : WARNING,
 
2602
                                                (errmsg("corrupted pgstat.stat file")));
 
2603
                                goto done;
 
2604
                }
 
2605
        }
 
2606
 
 
2607
done:
 
2608
        FreeFile(fpin);
 
2609
}
 
2610
 
 
2611
/*
 
2612
 * If not done for this transaction, read the statistics collector
 
2613
 * stats file into some hash tables.
 
2614
 *
 
2615
 * Because we store the hash tables in TopTransactionContext, the result
 
2616
 * is good for the entire current main transaction.
 
2617
 */
 
2618
static void
 
2619
backend_read_statsfile(void)
 
2620
{
 
2621
        TransactionId topXid = GetTopTransactionId();
 
2622
 
 
2623
        if (!TransactionIdEquals(pgStatDBHashXact, topXid))
 
2624
        {
 
2625
                Assert(!pgStatRunningInCollector);
 
2626
                pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
 
2627
                                                          &pgStatBeTable, &pgStatNumBackends);
 
2628
                pgStatDBHashXact = topXid;
 
2629
        }
 
2630
}
 
2631
 
 
2632
 
 
2633
/* ----------
 
2634
 * pgstat_recv_bestart() -
 
2635
 *
 
2636
 *      Process a backend startup message.
 
2637
 * ----------
 
2638
 */
 
2639
static void
 
2640
pgstat_recv_bestart(PgStat_MsgBestart *msg, int len)
 
2641
{
 
2642
        pgstat_add_backend(&msg->m_hdr);
 
2643
}
 
2644
 
 
2645
 
 
2646
/* ----------
 
2647
 * pgstat_recv_beterm() -
 
2648
 *
 
2649
 *      Process a backend termination message.
 
2650
 * ----------
 
2651
 */
 
2652
static void
 
2653
pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len)
 
2654
{
 
2655
        pgstat_sub_backend(msg->m_hdr.m_procpid);
 
2656
}
 
2657
 
 
2658
 
 
2659
/* ----------
 
2660
 * pgstat_recv_activity() -
 
2661
 *
 
2662
 *      Remember what the backend is doing.
 
2663
 * ----------
 
2664
 */
 
2665
static void
 
2666
pgstat_recv_activity(PgStat_MsgActivity *msg, int len)
 
2667
{
 
2668
        PgStat_StatBeEntry *entry;
 
2669
 
 
2670
        /*
 
2671
         * Here we check explicitly for 0 return, since we don't want to
 
2672
         * mangle the activity of an active backend by a delayed packet from a
 
2673
         * dead one.
 
2674
         */
 
2675
        if (pgstat_add_backend(&msg->m_hdr) != 0)
 
2676
                return;
 
2677
 
 
2678
        entry = &(pgStatBeTable[msg->m_hdr.m_backendid - 1]);
 
2679
 
 
2680
        StrNCpy(entry->activity, msg->m_what, PGSTAT_ACTIVITY_SIZE);
 
2681
 
 
2682
        entry->activity_start_sec =
 
2683
                GetCurrentAbsoluteTimeUsec(&entry->activity_start_usec);
 
2684
}
 
2685
 
 
2686
 
 
2687
/* ----------
 
2688
 * pgstat_recv_tabstat() -
 
2689
 *
 
2690
 *      Count what the backend has done.
 
2691
 * ----------
 
2692
 */
 
2693
static void
 
2694
pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
 
2695
{
 
2696
        PgStat_TableEntry *tabmsg = &(msg->m_entry[0]);
 
2697
        PgStat_StatDBEntry *dbentry;
 
2698
        PgStat_StatTabEntry *tabentry;
 
2699
        int                     i;
 
2700
        bool            found;
 
2701
 
 
2702
        /*
 
2703
         * Make sure the backend is counted for.
 
2704
         */
 
2705
        if (pgstat_add_backend(&msg->m_hdr) < 0)
 
2706
                return;
 
2707
 
 
2708
        /*
 
2709
         * Lookup the database in the hashtable.
 
2710
         */
 
2711
        dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 
2712
                                                                         (void *) &(msg->m_hdr.m_databaseid),
 
2713
                                                                                                 HASH_FIND, NULL);
 
2714
        if (!dbentry)
 
2715
                return;
 
2716
 
 
2717
        /*
 
2718
         * If the database is marked for destroy, this is a delayed UDP packet
 
2719
         * and not worth being counted.
 
2720
         */
 
2721
        if (dbentry->destroy > 0)
 
2722
                return;
 
2723
 
 
2724
        dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
 
2725
        dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
 
2726
 
 
2727
        /*
 
2728
         * Process all table entries in the message.
 
2729
         */
 
2730
        for (i = 0; i < msg->m_nentries; i++)
 
2731
        {
 
2732
                tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
 
2733
                                                                                          (void *) &(tabmsg[i].t_id),
 
2734
                                                                                                         HASH_ENTER, &found);
 
2735
                if (tabentry == NULL)
 
2736
                        ereport(ERROR,
 
2737
                                        (errcode(ERRCODE_OUT_OF_MEMORY),
 
2738
                         errmsg("out of memory in statistics collector --- abort")));
 
2739
 
 
2740
                if (!found)
 
2741
                {
 
2742
                        /*
 
2743
                         * If it's a new table entry, initialize counters to the
 
2744
                         * values we just got.
 
2745
                         */
 
2746
                        tabentry->numscans = tabmsg[i].t_numscans;
 
2747
                        tabentry->tuples_returned = tabmsg[i].t_tuples_returned;
 
2748
                        tabentry->tuples_fetched = tabmsg[i].t_tuples_fetched;
 
2749
                        tabentry->tuples_inserted = tabmsg[i].t_tuples_inserted;
 
2750
                        tabentry->tuples_updated = tabmsg[i].t_tuples_updated;
 
2751
                        tabentry->tuples_deleted = tabmsg[i].t_tuples_deleted;
 
2752
                        tabentry->blocks_fetched = tabmsg[i].t_blocks_fetched;
 
2753
                        tabentry->blocks_hit = tabmsg[i].t_blocks_hit;
 
2754
 
 
2755
                        tabentry->destroy = 0;
 
2756
                }
 
2757
                else
 
2758
                {
 
2759
                        /*
 
2760
                         * Otherwise add the values to the existing entry.
 
2761
                         */
 
2762
                        tabentry->numscans += tabmsg[i].t_numscans;
 
2763
                        tabentry->tuples_returned += tabmsg[i].t_tuples_returned;
 
2764
                        tabentry->tuples_fetched += tabmsg[i].t_tuples_fetched;
 
2765
                        tabentry->tuples_inserted += tabmsg[i].t_tuples_inserted;
 
2766
                        tabentry->tuples_updated += tabmsg[i].t_tuples_updated;
 
2767
                        tabentry->tuples_deleted += tabmsg[i].t_tuples_deleted;
 
2768
                        tabentry->blocks_fetched += tabmsg[i].t_blocks_fetched;
 
2769
                        tabentry->blocks_hit += tabmsg[i].t_blocks_hit;
 
2770
                }
 
2771
 
 
2772
                /*
 
2773
                 * And add the block IO to the database entry.
 
2774
                 */
 
2775
                dbentry->n_blocks_fetched += tabmsg[i].t_blocks_fetched;
 
2776
                dbentry->n_blocks_hit += tabmsg[i].t_blocks_hit;
 
2777
        }
 
2778
}
 
2779
 
 
2780
 
 
2781
/* ----------
 
2782
 * pgstat_recv_tabpurge() -
 
2783
 *
 
2784
 *      Arrange for dead table removal.
 
2785
 * ----------
 
2786
 */
 
2787
static void
 
2788
pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
 
2789
{
 
2790
        PgStat_StatDBEntry *dbentry;
 
2791
        PgStat_StatTabEntry *tabentry;
 
2792
        int                     i;
 
2793
 
 
2794
        /*
 
2795
         * Make sure the backend is counted for.
 
2796
         */
 
2797
        if (pgstat_add_backend(&msg->m_hdr) < 0)
 
2798
                return;
 
2799
 
 
2800
        /*
 
2801
         * Lookup the database in the hashtable.
 
2802
         */
 
2803
        dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 
2804
                                                                         (void *) &(msg->m_hdr.m_databaseid),
 
2805
                                                                                                 HASH_FIND, NULL);
 
2806
        if (!dbentry)
 
2807
                return;
 
2808
 
 
2809
        /*
 
2810
         * If the database is marked for destroy, this is a delayed UDP packet
 
2811
         * and the tables will go away at DB destruction.
 
2812
         */
 
2813
        if (dbentry->destroy > 0)
 
2814
                return;
 
2815
 
 
2816
        /*
 
2817
         * Process all table entries in the message.
 
2818
         */
 
2819
        for (i = 0; i < msg->m_nentries; i++)
 
2820
        {
 
2821
                tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
 
2822
                                                                                   (void *) &(msg->m_tableid[i]),
 
2823
                                                                                                           HASH_FIND, NULL);
 
2824
                if (tabentry)
 
2825
                        tabentry->destroy = PGSTAT_DESTROY_COUNT;
 
2826
        }
 
2827
}
 
2828
 
 
2829
 
 
2830
/* ----------
 
2831
 * pgstat_recv_dropdb() -
 
2832
 *
 
2833
 *      Arrange for dead database removal
 
2834
 * ----------
 
2835
 */
 
2836
static void
 
2837
pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
 
2838
{
 
2839
        PgStat_StatDBEntry *dbentry;
 
2840
 
 
2841
        /*
 
2842
         * Make sure the backend is counted for.
 
2843
         */
 
2844
        if (pgstat_add_backend(&msg->m_hdr) < 0)
 
2845
                return;
 
2846
 
 
2847
        /*
 
2848
         * Lookup the database in the hashtable.
 
2849
         */
 
2850
        dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 
2851
                                                                                   (void *) &(msg->m_databaseid),
 
2852
                                                                                                 HASH_FIND, NULL);
 
2853
        if (!dbentry)
 
2854
                return;
 
2855
 
 
2856
        /*
 
2857
         * Mark the database for destruction.
 
2858
         */
 
2859
        dbentry->destroy = PGSTAT_DESTROY_COUNT;
 
2860
}
 
2861
 
 
2862
 
 
2863
/* ----------
 
2864
 * pgstat_recv_dropdb() -
 
2865
 *
 
2866
 *      Arrange for dead database removal
 
2867
 * ----------
 
2868
 */
 
2869
static void
 
2870
pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
 
2871
{
 
2872
        HASHCTL         hash_ctl;
 
2873
        PgStat_StatDBEntry *dbentry;
 
2874
 
 
2875
        /*
 
2876
         * Make sure the backend is counted for.
 
2877
         */
 
2878
        if (pgstat_add_backend(&msg->m_hdr) < 0)
 
2879
                return;
 
2880
 
 
2881
        /*
 
2882
         * Lookup the database in the hashtable.
 
2883
         */
 
2884
        dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 
2885
                                                                         (void *) &(msg->m_hdr.m_databaseid),
 
2886
                                                                                                 HASH_FIND, NULL);
 
2887
        if (!dbentry)
 
2888
                return;
 
2889
 
 
2890
        /*
 
2891
         * We simply throw away all the databases table entries by recreating
 
2892
         * a new hash table for them.
 
2893
         */
 
2894
        if (dbentry->tables != NULL)
 
2895
                hash_destroy(dbentry->tables);
 
2896
 
 
2897
        dbentry->tables = NULL;
 
2898
        dbentry->n_xact_commit = 0;
 
2899
        dbentry->n_xact_rollback = 0;
 
2900
        dbentry->n_blocks_fetched = 0;
 
2901
        dbentry->n_blocks_hit = 0;
 
2902
        dbentry->n_connects = 0;
 
2903
        dbentry->destroy = 0;
 
2904
 
 
2905
        memset(&hash_ctl, 0, sizeof(hash_ctl));
 
2906
        hash_ctl.keysize = sizeof(Oid);
 
2907
        hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
 
2908
        hash_ctl.hash = tag_hash;
 
2909
        dbentry->tables = hash_create("Per-database table",
 
2910
                                                                  PGSTAT_TAB_HASH_SIZE,
 
2911
                                                                  &hash_ctl,
 
2912
                                                                  HASH_ELEM | HASH_FUNCTION);
 
2913
}