1
/*-------------------------------------------------------------------------
5
* PostgreSQL WAL archiver
7
* All functions relating to archiver are included here
9
* - All functions executed by archiver process
11
* - archiver is forked from postmaster, and the two
12
* processes then communicate using signals. All functions
13
* executed by postmaster are included in this file.
15
* Initial author: Simon Riggs simon@2ndquadrant.com
17
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
18
* Portions Copyright (c) 1994, Regents of the University of California
24
*-------------------------------------------------------------------------
35
#include "access/xlog_internal.h"
36
#include "libpq/pqsignal.h"
37
#include "miscadmin.h"
38
#include "postmaster/fork_process.h"
39
#include "postmaster/pgarch.h"
40
#include "postmaster/postmaster.h"
41
#include "storage/fd.h"
42
#include "storage/ipc.h"
43
#include "storage/pg_shmem.h"
44
#include "storage/pmsignal.h"
45
#include "utils/guc.h"
46
#include "utils/ps_status.h"
53
#define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the
54
* archive status directory; in
56
#define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
57
* failed archiver; in seconds. */
60
* Archiver control info.
62
* We expect that archivable files within pg_xlog will have names between
63
* MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
64
* appearing in VALID_XFN_CHARS. The status files in archive_status have
65
* corresponding names with ".ready" or ".done" appended.
68
#define MIN_XFN_CHARS 16
69
#define MAX_XFN_CHARS 40
70
#define VALID_XFN_CHARS "0123456789ABCDEF.history.backup"
72
#define NUM_ARCHIVE_RETRIES 3
79
static time_t last_pgarch_start_time;
80
static time_t last_sigterm_time = 0;
83
* Flags set by interrupt handlers for later service in the main loop.
85
static volatile sig_atomic_t got_SIGHUP = false;
86
static volatile sig_atomic_t got_SIGTERM = false;
87
static volatile sig_atomic_t wakened = false;
88
static volatile sig_atomic_t ready_to_stop = false;
91
* Local function forward declarations
95
static pid_t pgarch_forkexec(void);
98
NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);
99
static void pgarch_exit(SIGNAL_ARGS);
100
static void ArchSigHupHandler(SIGNAL_ARGS);
101
static void ArchSigTermHandler(SIGNAL_ARGS);
102
static void pgarch_waken(SIGNAL_ARGS);
103
static void pgarch_waken_stop(SIGNAL_ARGS);
104
static void pgarch_MainLoop(void);
105
static void pgarch_ArchiverCopyLoop(void);
106
static bool pgarch_archiveXlog(char *xlog);
107
static bool pgarch_readyXlog(char *xlog);
108
static void pgarch_archiveDone(char *xlog);
111
/* ------------------------------------------------------------
112
* Public functions called from postmaster follow
113
* ------------------------------------------------------------
119
* Called from postmaster at startup or after an existing archiver
120
* died. Attempt to fire up a fresh archiver process.
122
* Returns PID of child process, or 0 if fail.
124
* Note: if fail, we will be called again from the postmaster main loop.
133
* Do nothing if no archiver needed
135
if (!XLogArchivingActive())
139
* Do nothing if too soon since last archiver start. This is a safety
140
* valve to protect against continuous respawn attempts if the archiver is
141
* dying immediately at launch. Note that since we will be re-called from
142
* the postmaster main loop, we will get another chance later.
144
curtime = time(NULL);
145
if ((unsigned int) (curtime - last_pgarch_start_time) <
146
(unsigned int) PGARCH_RESTART_INTERVAL)
148
last_pgarch_start_time = curtime;
151
switch ((pgArchPid = pgarch_forkexec()))
153
switch ((pgArchPid = fork_process()))
158
(errmsg("could not fork archiver: %m")));
163
/* in postmaster child ... */
164
/* Close the postmaster's sockets */
165
ClosePostmasterPorts(false);
167
/* Lose the postmaster's on-exit routines */
170
/* Drop our connection to postmaster's shared memory, as well */
171
PGSharedMemoryDetach();
173
PgArchiverMain(0, NULL);
178
return (int) pgArchPid;
181
/* shouldn't get here */
185
/* ------------------------------------------------------------
186
* Local functions called by archiver follow
187
* ------------------------------------------------------------
194
* pgarch_forkexec() -
196
* Format up the arglist for, then fork and exec, archive process
199
pgarch_forkexec(void)
204
av[ac++] = "postgres";
206
av[ac++] = "--forkarch";
208
av[ac++] = NULL; /* filled in by postmaster_forkexec */
211
Assert(ac < lengthof(av));
213
return postmaster_forkexec(ac, av);
215
#endif /* EXEC_BACKEND */
221
* The argc/argv parameters are valid only in EXEC_BACKEND case. However,
222
* since we don't use 'em, it hardly matters...
225
PgArchiverMain(int argc, char *argv[])
227
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
229
MyProcPid = getpid(); /* reset MyProcPid */
231
MyStartTime = time(NULL); /* record Start Time for logging */
234
* If possible, make this process a group leader, so that the postmaster
235
* can signal any child processes too.
239
elog(FATAL, "setsid() failed: %m");
243
* Ignore all signals usually bound to some action in the postmaster,
244
* except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
246
pqsignal(SIGHUP, ArchSigHupHandler);
247
pqsignal(SIGINT, SIG_IGN);
248
pqsignal(SIGTERM, ArchSigTermHandler);
249
pqsignal(SIGQUIT, pgarch_exit);
250
pqsignal(SIGALRM, SIG_IGN);
251
pqsignal(SIGPIPE, SIG_IGN);
252
pqsignal(SIGUSR1, pgarch_waken);
253
pqsignal(SIGUSR2, pgarch_waken_stop);
254
pqsignal(SIGCHLD, SIG_DFL);
255
pqsignal(SIGTTIN, SIG_DFL);
256
pqsignal(SIGTTOU, SIG_DFL);
257
pqsignal(SIGCONT, SIG_DFL);
258
pqsignal(SIGWINCH, SIG_DFL);
259
PG_SETMASK(&UnBlockSig);
262
* Identify myself via ps
264
init_ps_display("archiver process", "", "", "");
271
/* SIGQUIT signal handler for archiver process */
273
pgarch_exit(SIGNAL_ARGS)
275
/* SIGQUIT means curl up and die ... */
279
/* SIGHUP signal handler for archiver process */
281
ArchSigHupHandler(SIGNAL_ARGS)
283
/* set flag to re-read config file at next convenient time */
287
/* SIGTERM signal handler for archiver process */
289
ArchSigTermHandler(SIGNAL_ARGS)
292
* The postmaster never sends us SIGTERM, so we assume that this means
293
* that init is trying to shut down the whole system. If we hang around
294
* too long we'll get SIGKILL'd. Set flag to prevent starting any more
300
/* SIGUSR1 signal handler for archiver process */
302
pgarch_waken(SIGNAL_ARGS)
304
/* set flag that there is work to be done */
308
/* SIGUSR2 signal handler for archiver process */
310
pgarch_waken_stop(SIGNAL_ARGS)
312
/* set flag to do a final cycle and shut down afterwards */
313
ready_to_stop = true;
319
* Main loop for archiver
322
pgarch_MainLoop(void)
324
time_t last_copy_time = 0;
328
* We run the copy loop immediately upon entry, in case there are
329
* unarchived files left over from a previous database run (or maybe the
330
* archiver died unexpectedly). After that we wait for a signal or
331
* timeout before doing more.
337
/* When we get SIGUSR2, we do one more archive cycle, then exit */
338
time_to_stop = ready_to_stop;
340
/* Check for config update */
344
ProcessConfigFile(PGC_SIGHUP);
348
* If we've gotten SIGTERM, we normally just sit and do nothing until
349
* SIGUSR2 arrives. However, that means a random SIGTERM would
350
* disable archiving indefinitely, which doesn't seem like a good
351
* idea. If more than 60 seconds pass since SIGTERM, exit anyway,
352
* so that the postmaster can start a new archiver if needed.
356
time_t curtime = time(NULL);
358
if (last_sigterm_time == 0)
359
last_sigterm_time = curtime;
360
else if ((unsigned int) (curtime - last_sigterm_time) >=
365
/* Do what we're here for */
366
if (wakened || time_to_stop)
369
pgarch_ArchiverCopyLoop();
370
last_copy_time = time(NULL);
374
* There shouldn't be anything for the archiver to do except to wait
375
* for a signal ... however, the archiver exists to protect our data,
376
* so she wakes up occasionally to allow herself to be proactive.
378
* On some platforms, signals won't interrupt the sleep. To ensure we
379
* respond reasonably promptly when someone signals us, break down the
380
* sleep into 1-second increments, and check for interrupts after each
383
while (!(wakened || ready_to_stop || got_SIGHUP ||
384
!PostmasterIsAlive(true)))
389
curtime = time(NULL);
390
if ((unsigned int) (curtime - last_copy_time) >=
391
(unsigned int) PGARCH_AUTOWAKE_INTERVAL)
396
* The archiver quits either when the postmaster dies (not expected)
397
* or after completing one more archiving cycle after receiving
400
} while (PostmasterIsAlive(true) && !time_to_stop);
404
* pgarch_ArchiverCopyLoop
406
* Archives all outstanding xlogs then returns
409
pgarch_ArchiverCopyLoop(void)
411
char xlog[MAX_XFN_CHARS + 1];
413
if (!XLogArchiveCommandSet())
416
(errmsg("archive_mode enabled, yet archive_command is not set")));
417
/* can't do anything if no command ... */
422
* loop through all xlogs with archive_status of .ready and archive
423
* them...mostly we expect this to be a single file, though it is possible
424
* some backend will add files onto the list of those that need archiving
425
* while we are still copying earlier archives
427
while (pgarch_readyXlog(xlog))
434
* Do not initiate any more archive commands after receiving
435
* SIGTERM, nor after the postmaster has died unexpectedly.
436
* The first condition is to try to keep from having init
437
* SIGKILL the command, and the second is to avoid conflicts
438
* with another archiver spawned by a newer postmaster.
440
if (got_SIGTERM || !PostmasterIsAlive(true))
443
if (pgarch_archiveXlog(xlog))
446
pgarch_archiveDone(xlog);
447
break; /* out of inner retry loop */
451
if (++failures >= NUM_ARCHIVE_RETRIES)
454
(errmsg("transaction log file \"%s\" could not be archived: too many failures",
456
return; /* give up archiving for now */
458
pg_usleep(1000000L); /* wait a bit before retrying */
467
* Invokes system(3) to copy one archive file to wherever it should go
469
* Returns true if successful
472
pgarch_archiveXlog(char *xlog)
474
char xlogarchcmd[MAXPGPATH];
475
char pathname[MAXPGPATH];
476
char activitymsg[MAXFNAMELEN + 16];
482
snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
485
* construct the command to be executed
488
endp = xlogarchcmd + MAXPGPATH - 1;
491
for (sp = XLogArchiveCommand; *sp; sp++)
498
/* %p: relative path of source file */
500
strlcpy(dp, pathname, endp - dp);
501
make_native_path(dp);
505
/* %f: filename of source file */
507
strlcpy(dp, xlog, endp - dp);
511
/* convert %% to a single % */
517
/* otherwise treat the % as not special */
532
(errmsg_internal("executing archive command \"%s\"",
535
/* Report archive activity in PS display */
536
snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
537
set_ps_display(activitymsg, false);
539
rc = system(xlogarchcmd);
543
* If either the shell itself, or a called command, died on a signal,
544
* abort the archiver. We do this because system() ignores SIGINT and
545
* SIGQUIT while waiting; so a signal is very likely something that
546
* should have interrupted us too. If we overreact it's no big deal,
547
* the postmaster will just start the archiver again.
549
* Per the Single Unix Spec, shells report exit status > 128 when a
550
* called command died on a signal.
552
int lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;
557
(errmsg("archive command failed with exit code %d",
559
errdetail("The failed archive command was: %s",
562
else if (WIFSIGNALED(rc))
566
(errmsg("archive command was terminated by exception 0x%X",
568
errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
569
errdetail("The failed archive command was: %s",
571
#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
573
(errmsg("archive command was terminated by signal %d: %s",
575
WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"),
576
errdetail("The failed archive command was: %s",
580
(errmsg("archive command was terminated by signal %d",
582
errdetail("The failed archive command was: %s",
589
(errmsg("archive command exited with unrecognized status %d",
591
errdetail("The failed archive command was: %s",
595
snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog);
596
set_ps_display(activitymsg, false);
601
(errmsg("archived transaction log file \"%s\"", xlog)));
603
snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
604
set_ps_display(activitymsg, false);
612
* Return name of the oldest xlog file that has not yet been archived.
613
* No notification is set that file archiving is now in progress, so
614
* this would need to be extended if multiple concurrent archival
615
* tasks were created. If a failure occurs, we will completely
616
* re-copy the file at the next available opportunity.
618
* It is important that we return the oldest, so that we archive xlogs
619
* in order that they were written, for two reasons:
620
* 1) to maintain the sequential chain of xlogs required for recovery
621
* 2) because the oldest ones will sooner become candidates for
622
* recycling at time of checkpoint
624
* NOTE: the "oldest" comparison will presently consider all segments of
625
* a timeline with a smaller ID to be older than all segments of a timeline
626
* with a larger ID; the net result being that past timelines are given
627
* higher priority for archiving. This seems okay, or at least not
628
* obviously worth changing.
631
pgarch_readyXlog(char *xlog)
634
* open xlog status directory and read through list of xlogs that have the
635
* .ready suffix, looking for earliest file. It is possible to optimise
636
* this code, though only a single file is expected on the vast majority
639
char XLogArchiveStatusDir[MAXPGPATH];
640
char newxlog[MAX_XFN_CHARS + 6 + 1];
645
snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
646
rldir = AllocateDir(XLogArchiveStatusDir);
649
(errcode_for_file_access(),
650
errmsg("could not open archive status directory \"%s\": %m",
651
XLogArchiveStatusDir)));
653
while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
655
int basenamelen = (int) strlen(rlde->d_name) - 6;
657
if (basenamelen >= MIN_XFN_CHARS &&
658
basenamelen <= MAX_XFN_CHARS &&
659
strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen &&
660
strcmp(rlde->d_name + basenamelen, ".ready") == 0)
664
strcpy(newxlog, rlde->d_name);
669
if (strcmp(rlde->d_name, newxlog) < 0)
670
strcpy(newxlog, rlde->d_name);
678
/* truncate off the .ready */
679
newxlog[strlen(newxlog) - 6] = '\0';
680
strcpy(xlog, newxlog);
688
* Emit notification that an xlog file has been successfully archived.
689
* We do this by renaming the status file from NNN.ready to NNN.done.
690
* Eventually, a checkpoint process will notice this and delete both the
691
* NNN.done file and the xlog file itself.
694
pgarch_archiveDone(char *xlog)
696
char rlogready[MAXPGPATH];
697
char rlogdone[MAXPGPATH];
699
StatusFilePath(rlogready, xlog, ".ready");
700
StatusFilePath(rlogdone, xlog, ".done");
701
if (rename(rlogready, rlogdone) < 0)
703
(errcode_for_file_access(),
704
errmsg("could not rename file \"%s\" to \"%s\": %m",
705
rlogready, rlogdone)));